summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Otto <th1000s@posteo.net>2023-05-31 19:17:18 +0200
committerGitHub <noreply@github.com>2023-05-31 13:17:18 -0400
commit139cdb9656292edba917fd6addc0a7960cf60342 (patch)
treed598b6663e03ebd5f54d2198b00df4d00d25c893
parent65418aaa3bc064dc7ce34e6d2f231b96a5c6acb9 (diff)
Misc tab refactoring (#1424)
* Move tabs logic into utils * Re-use buffer returned by tabs::expand * Add TabCfg to configure tabs Use the String from this config for the tab replacement. This avoids creating a new String for each processed line. * Avoid unicode segmentation for each line just to remove a prefix In some code paths no prefix is removed, and in almost all other cases the prefix is just ascii. This simplifies a lot of calls. * Set default tab with to 8 Editors like vim, emacs, nano and most terminal emulators set this value as the default tab display width.
-rw-r--r--src/cli.rs2
-rw-r--r--src/config.rs4
-rw-r--r--src/handlers/grep.rs18
-rw-r--r--src/handlers/hunk.rs11
-rw-r--r--src/paint.rs38
-rw-r--r--src/subcommands/show_config.rs2
-rw-r--r--src/utils/mod.rs1
-rw-r--r--src/utils/tabs.rs64
8 files changed, 88 insertions, 52 deletions
diff --git a/src/cli.rs b/src/cli.rs
index eb1f9b85..80c73b90 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -935,7 +935,7 @@ pub struct Opt {
/// syntax highlighting.
pub syntax_theme: Option<String>,
- #[arg(long = "tabs", default_value = "4", value_name = "N")]
+ #[arg(long = "tabs", default_value = "8", value_name = "N")]
/// The number of spaces to replace tab characters with.
///
/// Use --tabs=0 to pass tab characters through directly, but note that in that case delta will
diff --git a/src/config.rs b/src/config.rs
index 10ab0eca..3d411ea4 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -120,7 +120,7 @@ pub struct Config {
pub syntax_dummy_theme: SyntaxTheme,
pub syntax_set: SyntaxSet,
pub syntax_theme: Option<SyntaxTheme>,
- pub tab_width: usize,
+ pub tab_cfg: utils::tabs::TabCfg,
pub tokenization_regex: Regex,
pub true_color: bool,
pub truncation_symbol: String,
@@ -357,7 +357,7 @@ impl From<cli::Opt> for Config {
syntax_dummy_theme: SyntaxTheme::default(),
syntax_set: opt.computed.syntax_set,
syntax_theme: opt.computed.syntax_theme,
- tab_width: opt.tab_width,
+ tab_cfg: utils::tabs::TabCfg::new(opt.tab_width),
tokenization_regex,
true_color: opt.computed.true_color,
truncation_symbol: format!("{}→{}", ansi::ANSI_SGR_REVERSE, ansi::ANSI_SGR_RESET),
diff --git a/src/handlers/grep.rs b/src/handlers/grep.rs
index 2356f9e8..89109a69 100644
--- a/src/handlers/grep.rs
+++ b/src/handlers/grep.rs
@@ -3,14 +3,13 @@ use std::borrow::Cow;
use lazy_static::lazy_static;
use regex::Regex;
use serde::Deserialize;
-use unicode_segmentation::UnicodeSegmentation;
use crate::ansi;
use crate::delta::{State, StateMachine};
use crate::handlers::{self, ripgrep_json};
-use crate::paint::{self, expand_tabs, BgShouldFill, StyleSectionSpecifier};
+use crate::paint::{self, BgShouldFill, StyleSectionSpecifier};
use crate::style::Style;
-use crate::utils::process;
+use crate::utils::{process, tabs};
#[derive(Debug, PartialEq, Eq)]
pub struct GrepLine<'b> {
@@ -139,11 +138,8 @@ impl<'a> StateMachine<'a> {
// (At the time of writing, we are in this
// arm iff we are handling `ripgrep --json`
// output.)
- grep_line.code = paint::expand_tabs(
- grep_line.code.graphemes(true),
- self.config.tab_width,
- )
- .into();
+ grep_line.code =
+ tabs::expand(&grep_line.code, &self.config.tab_cfg).into();
make_style_sections(
&grep_line.code,
submatches,
@@ -158,10 +154,8 @@ impl<'a> StateMachine<'a> {
// enough. But at the point it is guaranteed
// that this handler is going to handle this
// line, so mutating it is acceptable.
- self.raw_line = expand_tabs(
- self.raw_line.graphemes(true),
- self.config.tab_width,
- );
+ self.raw_line =
+ tabs::expand(&self.raw_line, &self.config.tab_cfg);
get_code_style_sections(
&self.raw_line,
self.config.grep_match_word_style,
diff --git a/src/handlers/hunk.rs b/src/handlers/hunk.rs
index b5c777c2..30e87ebe 100644
--- a/src/handlers/hunk.rs
+++ b/src/handlers/hunk.rs
@@ -5,10 +5,10 @@ use lazy_static::lazy_static;
use crate::cli;
use crate::config::{delta_unreachable, Config};
use crate::delta::{DiffType, InMergeConflict, MergeParents, State, StateMachine};
-use crate::paint::{expand_tabs, prepare, prepare_raw_line};
+use crate::paint::{prepare, prepare_raw_line};
use crate::style;
use crate::utils::process::{self, CallingProcess};
-use unicode_segmentation::UnicodeSegmentation;
+use crate::utils::tabs;
// HACK: WordDiff should probably be a distinct top-level line state
pub fn is_word_diff() -> bool {
@@ -118,10 +118,9 @@ impl<'a> StateMachine<'a> {
// is not a hunk line, but the parser does not have a more accurate state corresponding
// to this.
self.painter.paint_buffered_minus_and_plus_lines();
- self.painter.output_buffer.push_str(&expand_tabs(
- self.raw_line.graphemes(true),
- self.config.tab_width,
- ));
+ self.painter
+ .output_buffer
+ .push_str(&tabs::expand(&self.raw_line, &self.config.tab_cfg));
self.painter.output_buffer.push('\n');
State::HunkZero(Unified, None)
}
diff --git a/src/paint.rs b/src/paint.rs
index ca35aa54..a38fa946 100644
--- a/src/paint.rs
+++ b/src/paint.rs
@@ -7,7 +7,6 @@ use itertools::Itertools;
use syntect::easy::HighlightLines;
use syntect::highlighting::Style as SyntectStyle;
use syntect::parsing::{SyntaxReference, SyntaxSet};
-use unicode_segmentation::UnicodeSegmentation;
use crate::config::{self, delta_unreachable, Config};
use crate::delta::{DiffType, InMergeConflict, MergeParents, State};
@@ -20,7 +19,7 @@ use crate::minusplus::*;
use crate::paint::superimpose_style_sections::superimpose_style_sections;
use crate::style::Style;
use crate::{ansi, style};
-use crate::{edits, utils};
+use crate::{edits, utils, utils::tabs};
pub type LineSections<'a, S> = Vec<(S, &'a str)>;
@@ -261,10 +260,7 @@ impl<'p> Painter<'p> {
state: State,
background_color_extends_to_terminal_width: BgShouldFill,
) {
- let lines = vec![(
- expand_tabs(line.graphemes(true), self.config.tab_width),
- state,
- )];
+ let lines = vec![(tabs::expand(line, &self.config.tab_cfg), state)];
let syntax_style_sections =
get_syntax_style_sections_for_lines(&lines, self.highlighter.as_mut(), self.config);
let diff_style_sections = match style_sections {
@@ -561,8 +557,9 @@ pub fn prepare(line: &str, prefix_length: usize, config: &config::Config) -> Str
// The prefix contains -/+/space characters, added by git. We removes them now so they
// are not present during syntax highlighting or wrapping. If --keep-plus-minus-markers
// is in effect the prefix is re-inserted in Painter::paint_line.
- let line = line.graphemes(true).skip(prefix_length);
- format!("{}\n", expand_tabs(line, config.tab_width))
+ let mut line = tabs::remove_prefix_and_expand(prefix_length, line, &config.tab_cfg);
+ line.push('\n');
+ line
} else {
"\n".to_string()
}
@@ -571,28 +568,9 @@ pub fn prepare(line: &str, prefix_length: usize, config: &config::Config) -> Str
// Remove initial -/+ characters, expand tabs as spaces, retaining ANSI sequences. Terminate with
// newline character.
pub fn prepare_raw_line(raw_line: &str, prefix_length: usize, config: &config::Config) -> String {
- format!(
- "{}\n",
- ansi::ansi_preserving_slice(
- &expand_tabs(raw_line.graphemes(true), config.tab_width),
- prefix_length
- ),
- )
-}
-
-/// Expand tabs as spaces.
-/// tab_width = 0 is documented to mean do not replace tabs.
-pub fn expand_tabs<'a, I>(line: I, tab_width: usize) -> String
-where
- I: Iterator<Item = &'a str>,
-{
- if tab_width > 0 {
- let tab_replacement = " ".repeat(tab_width);
- line.map(|s| if s == "\t" { &tab_replacement } else { s })
- .collect::<String>()
- } else {
- line.collect::<String>()
- }
+ let mut line = tabs::expand(raw_line, &config.tab_cfg);
+ line.push('\n');
+ ansi::ansi_preserving_slice(&line, prefix_length)
}
pub fn paint_minus_and_plus_lines(
diff --git a/src/subcommands/show_config.rs b/src/subcommands/show_config.rs
index 553f140a..3dba4110 100644
--- a/src/subcommands/show_config.rs
+++ b/src/subcommands/show_config.rs
@@ -158,7 +158,7 @@ pub fn show_config(config: &config::Config, writer: &mut dyn Write) -> std::io::
cli::Width::Fixed(width) => width.to_string(),
cli::Width::Variable => "variable".to_string(),
},
- tab_width = config.tab_width,
+ tab_width = config.tab_cfg.width(),
tokenization_regex = format_option_value(config.tokenization_regex.to_string()),
)?;
Ok(())
diff --git a/src/utils/mod.rs b/src/utils/mod.rs
index 257c7b03..fa8427b6 100644
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@@ -5,4 +5,5 @@ pub mod process;
pub mod regex_replacement;
pub mod round_char_boundary;
pub mod syntect;
+pub mod tabs;
pub mod workarounds;
diff --git a/src/utils/tabs.rs b/src/utils/tabs.rs
new file mode 100644
index 00000000..67eab32b
--- /dev/null
+++ b/src/utils/tabs.rs
@@ -0,0 +1,64 @@
+use unicode_segmentation::UnicodeSegmentation;
+
+#[derive(Debug, Clone)]
+pub struct TabCfg {
+ replacement: String,
+}
+
+impl TabCfg {
+ pub fn new(width: usize) -> Self {
+ TabCfg {
+ replacement: " ".repeat(width),
+ }
+ }
+ pub fn width(&self) -> usize {
+ self.replacement.len()
+ }
+ pub fn replace(&self) -> bool {
+ !self.replacement.is_empty()
+ }
+}
+
+/// Expand tabs as spaces.
+pub fn expand(line: &str, tab_cfg: &TabCfg) -> String {
+ if tab_cfg.replace() && line.as_bytes().iter().any(|c| *c == b'\t') {
+ itertools::join(line.split('\t'), &tab_cfg.replacement)
+ } else {
+ line.to_string()
+ }
+}
+
+/// Remove `prefix` chars from `line`, then call `tabs::expand()`.
+pub fn remove_prefix_and_expand(prefix: usize, line: &str, tab_cfg: &TabCfg) -> String {
+ let line_bytes = line.as_bytes();
+ // The to-be-removed prefixes are almost always ascii +/- (or ++/ +/.. for merges) for
+ // which grapheme clusters are not required.
+ if line_bytes.len() >= prefix && line_bytes[..prefix].is_ascii() {
+ // Safety: slicing into the utf-8 line-str is ok, upto `prefix` only ascii was present.
+ expand(&line[prefix..], tab_cfg)
+ } else {
+ let cut_line = line.graphemes(true).skip(prefix).collect::<String>();
+ expand(&cut_line, tab_cfg)
+ }
+}
+
+#[cfg(test)]
+pub mod tests {
+ use super::*;
+
+ #[test]
+ fn test_remove_prefix_and_expand() {
+ let line = "+-foo\tbar";
+ let result = remove_prefix_and_expand(2, line, &TabCfg::new(3));
+ assert_eq!(result, "foo bar");
+ let result = remove_prefix_and_expand(2, line, &TabCfg::new(0));
+ assert_eq!(result, "foo\tbar");
+
+ let utf8_prefix = "-│-foo\tbar";
+ let n = 3;
+ let result = remove_prefix_and_expand(n, utf8_prefix, &TabCfg::new(1));
+ assert_eq!(result, "foo bar");
+ // ensure non-ascii chars were removed:
+ assert!(utf8_prefix.len() - result.len() > n);
+ }
+}