From 139cdb9656292edba917fd6addc0a7960cf60342 Mon Sep 17 00:00:00 2001 From: Thomas Otto Date: Wed, 31 May 2023 19:17:18 +0200 Subject: Misc tab refactoring (#1424) * Move tabs logic into utils * Re-use buffer returned by tabs::expand * Add TabCfg to configure tabs Use the String from this config for the tab replacement. This avoids creating a new String for each processed line. * Avoid unicode segmentation for each line just to remove a prefix In some code paths no prefix is removed, and in almost all other cases the prefix is just ascii. This simplifies a lot of calls. * Set default tab with to 8 Editors like vim, emacs, nano and most terminal emulators set this value as the default tab display width. --- src/cli.rs | 2 +- src/config.rs | 4 +-- src/handlers/grep.rs | 18 ++++-------- src/handlers/hunk.rs | 11 ++++---- src/paint.rs | 38 ++++++------------------- src/subcommands/show_config.rs | 2 +- src/utils/mod.rs | 1 + src/utils/tabs.rs | 64 ++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 88 insertions(+), 52 deletions(-) create mode 100644 src/utils/tabs.rs diff --git a/src/cli.rs b/src/cli.rs index eb1f9b85..80c73b90 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -935,7 +935,7 @@ pub struct Opt { /// syntax highlighting. pub syntax_theme: Option, - #[arg(long = "tabs", default_value = "4", value_name = "N")] + #[arg(long = "tabs", default_value = "8", value_name = "N")] /// The number of spaces to replace tab characters with. /// /// Use --tabs=0 to pass tab characters through directly, but note that in that case delta will diff --git a/src/config.rs b/src/config.rs index 10ab0eca..3d411ea4 100644 --- a/src/config.rs +++ b/src/config.rs @@ -120,7 +120,7 @@ pub struct Config { pub syntax_dummy_theme: SyntaxTheme, pub syntax_set: SyntaxSet, pub syntax_theme: Option, - pub tab_width: usize, + pub tab_cfg: utils::tabs::TabCfg, pub tokenization_regex: Regex, pub true_color: bool, pub truncation_symbol: String, @@ -357,7 +357,7 @@ impl From for Config { syntax_dummy_theme: SyntaxTheme::default(), syntax_set: opt.computed.syntax_set, syntax_theme: opt.computed.syntax_theme, - tab_width: opt.tab_width, + tab_cfg: utils::tabs::TabCfg::new(opt.tab_width), tokenization_regex, true_color: opt.computed.true_color, truncation_symbol: format!("{}→{}", ansi::ANSI_SGR_REVERSE, ansi::ANSI_SGR_RESET), diff --git a/src/handlers/grep.rs b/src/handlers/grep.rs index 2356f9e8..89109a69 100644 --- a/src/handlers/grep.rs +++ b/src/handlers/grep.rs @@ -3,14 +3,13 @@ use std::borrow::Cow; use lazy_static::lazy_static; use regex::Regex; use serde::Deserialize; -use unicode_segmentation::UnicodeSegmentation; use crate::ansi; use crate::delta::{State, StateMachine}; use crate::handlers::{self, ripgrep_json}; -use crate::paint::{self, expand_tabs, BgShouldFill, StyleSectionSpecifier}; +use crate::paint::{self, BgShouldFill, StyleSectionSpecifier}; use crate::style::Style; -use crate::utils::process; +use crate::utils::{process, tabs}; #[derive(Debug, PartialEq, Eq)] pub struct GrepLine<'b> { @@ -139,11 +138,8 @@ impl<'a> StateMachine<'a> { // (At the time of writing, we are in this // arm iff we are handling `ripgrep --json` // output.) - grep_line.code = paint::expand_tabs( - grep_line.code.graphemes(true), - self.config.tab_width, - ) - .into(); + grep_line.code = + tabs::expand(&grep_line.code, &self.config.tab_cfg).into(); make_style_sections( &grep_line.code, submatches, @@ -158,10 +154,8 @@ impl<'a> StateMachine<'a> { // enough. But at the point it is guaranteed // that this handler is going to handle this // line, so mutating it is acceptable. - self.raw_line = expand_tabs( - self.raw_line.graphemes(true), - self.config.tab_width, - ); + self.raw_line = + tabs::expand(&self.raw_line, &self.config.tab_cfg); get_code_style_sections( &self.raw_line, self.config.grep_match_word_style, diff --git a/src/handlers/hunk.rs b/src/handlers/hunk.rs index b5c777c2..30e87ebe 100644 --- a/src/handlers/hunk.rs +++ b/src/handlers/hunk.rs @@ -5,10 +5,10 @@ use lazy_static::lazy_static; use crate::cli; use crate::config::{delta_unreachable, Config}; use crate::delta::{DiffType, InMergeConflict, MergeParents, State, StateMachine}; -use crate::paint::{expand_tabs, prepare, prepare_raw_line}; +use crate::paint::{prepare, prepare_raw_line}; use crate::style; use crate::utils::process::{self, CallingProcess}; -use unicode_segmentation::UnicodeSegmentation; +use crate::utils::tabs; // HACK: WordDiff should probably be a distinct top-level line state pub fn is_word_diff() -> bool { @@ -118,10 +118,9 @@ impl<'a> StateMachine<'a> { // is not a hunk line, but the parser does not have a more accurate state corresponding // to this. self.painter.paint_buffered_minus_and_plus_lines(); - self.painter.output_buffer.push_str(&expand_tabs( - self.raw_line.graphemes(true), - self.config.tab_width, - )); + self.painter + .output_buffer + .push_str(&tabs::expand(&self.raw_line, &self.config.tab_cfg)); self.painter.output_buffer.push('\n'); State::HunkZero(Unified, None) } diff --git a/src/paint.rs b/src/paint.rs index ca35aa54..a38fa946 100644 --- a/src/paint.rs +++ b/src/paint.rs @@ -7,7 +7,6 @@ use itertools::Itertools; use syntect::easy::HighlightLines; use syntect::highlighting::Style as SyntectStyle; use syntect::parsing::{SyntaxReference, SyntaxSet}; -use unicode_segmentation::UnicodeSegmentation; use crate::config::{self, delta_unreachable, Config}; use crate::delta::{DiffType, InMergeConflict, MergeParents, State}; @@ -20,7 +19,7 @@ use crate::minusplus::*; use crate::paint::superimpose_style_sections::superimpose_style_sections; use crate::style::Style; use crate::{ansi, style}; -use crate::{edits, utils}; +use crate::{edits, utils, utils::tabs}; pub type LineSections<'a, S> = Vec<(S, &'a str)>; @@ -261,10 +260,7 @@ impl<'p> Painter<'p> { state: State, background_color_extends_to_terminal_width: BgShouldFill, ) { - let lines = vec![( - expand_tabs(line.graphemes(true), self.config.tab_width), - state, - )]; + let lines = vec![(tabs::expand(line, &self.config.tab_cfg), state)]; let syntax_style_sections = get_syntax_style_sections_for_lines(&lines, self.highlighter.as_mut(), self.config); let diff_style_sections = match style_sections { @@ -561,8 +557,9 @@ pub fn prepare(line: &str, prefix_length: usize, config: &config::Config) -> Str // The prefix contains -/+/space characters, added by git. We removes them now so they // are not present during syntax highlighting or wrapping. If --keep-plus-minus-markers // is in effect the prefix is re-inserted in Painter::paint_line. - let line = line.graphemes(true).skip(prefix_length); - format!("{}\n", expand_tabs(line, config.tab_width)) + let mut line = tabs::remove_prefix_and_expand(prefix_length, line, &config.tab_cfg); + line.push('\n'); + line } else { "\n".to_string() } @@ -571,28 +568,9 @@ pub fn prepare(line: &str, prefix_length: usize, config: &config::Config) -> Str // Remove initial -/+ characters, expand tabs as spaces, retaining ANSI sequences. Terminate with // newline character. pub fn prepare_raw_line(raw_line: &str, prefix_length: usize, config: &config::Config) -> String { - format!( - "{}\n", - ansi::ansi_preserving_slice( - &expand_tabs(raw_line.graphemes(true), config.tab_width), - prefix_length - ), - ) -} - -/// Expand tabs as spaces. -/// tab_width = 0 is documented to mean do not replace tabs. -pub fn expand_tabs<'a, I>(line: I, tab_width: usize) -> String -where - I: Iterator, -{ - if tab_width > 0 { - let tab_replacement = " ".repeat(tab_width); - line.map(|s| if s == "\t" { &tab_replacement } else { s }) - .collect::() - } else { - line.collect::() - } + let mut line = tabs::expand(raw_line, &config.tab_cfg); + line.push('\n'); + ansi::ansi_preserving_slice(&line, prefix_length) } pub fn paint_minus_and_plus_lines( diff --git a/src/subcommands/show_config.rs b/src/subcommands/show_config.rs index 553f140a..3dba4110 100644 --- a/src/subcommands/show_config.rs +++ b/src/subcommands/show_config.rs @@ -158,7 +158,7 @@ pub fn show_config(config: &config::Config, writer: &mut dyn Write) -> std::io:: cli::Width::Fixed(width) => width.to_string(), cli::Width::Variable => "variable".to_string(), }, - tab_width = config.tab_width, + tab_width = config.tab_cfg.width(), tokenization_regex = format_option_value(config.tokenization_regex.to_string()), )?; Ok(()) diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 257c7b03..fa8427b6 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -5,4 +5,5 @@ pub mod process; pub mod regex_replacement; pub mod round_char_boundary; pub mod syntect; +pub mod tabs; pub mod workarounds; diff --git a/src/utils/tabs.rs b/src/utils/tabs.rs new file mode 100644 index 00000000..67eab32b --- /dev/null +++ b/src/utils/tabs.rs @@ -0,0 +1,64 @@ +use unicode_segmentation::UnicodeSegmentation; + +#[derive(Debug, Clone)] +pub struct TabCfg { + replacement: String, +} + +impl TabCfg { + pub fn new(width: usize) -> Self { + TabCfg { + replacement: " ".repeat(width), + } + } + pub fn width(&self) -> usize { + self.replacement.len() + } + pub fn replace(&self) -> bool { + !self.replacement.is_empty() + } +} + +/// Expand tabs as spaces. +pub fn expand(line: &str, tab_cfg: &TabCfg) -> String { + if tab_cfg.replace() && line.as_bytes().iter().any(|c| *c == b'\t') { + itertools::join(line.split('\t'), &tab_cfg.replacement) + } else { + line.to_string() + } +} + +/// Remove `prefix` chars from `line`, then call `tabs::expand()`. +pub fn remove_prefix_and_expand(prefix: usize, line: &str, tab_cfg: &TabCfg) -> String { + let line_bytes = line.as_bytes(); + // The to-be-removed prefixes are almost always ascii +/- (or ++/ +/.. for merges) for + // which grapheme clusters are not required. + if line_bytes.len() >= prefix && line_bytes[..prefix].is_ascii() { + // Safety: slicing into the utf-8 line-str is ok, upto `prefix` only ascii was present. + expand(&line[prefix..], tab_cfg) + } else { + let cut_line = line.graphemes(true).skip(prefix).collect::(); + expand(&cut_line, tab_cfg) + } +} + +#[cfg(test)] +pub mod tests { + use super::*; + + #[test] + fn test_remove_prefix_and_expand() { + let line = "+-foo\tbar"; + let result = remove_prefix_and_expand(2, line, &TabCfg::new(3)); + assert_eq!(result, "foo bar"); + let result = remove_prefix_and_expand(2, line, &TabCfg::new(0)); + assert_eq!(result, "foo\tbar"); + + let utf8_prefix = "-│-foo\tbar"; + let n = 3; + let result = remove_prefix_and_expand(n, utf8_prefix, &TabCfg::new(1)); + assert_eq!(result, "foo bar"); + // ensure non-ascii chars were removed: + assert!(utf8_prefix.len() - result.len() > n); + } +} -- cgit v1.2.3