summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Hodel <drbrain@segment7.net>2024-02-26 04:07:59 -0800
committerGitHub <noreply@github.com>2024-02-26 12:07:59 +0000
commita3743f846b87850474518b20fdfd362468503a00 (patch)
treea0dc2c7f6916eb33e13415b9e54beccd79f29f33
parent43a1d3a862ad486a27287b178c572ccab0f0e46c (diff)
fix(stats): Enable multiple command stats to be shown using unicode_segmentation (#1739)
* Enable multiple command stats to be shown Add improved pipe splitting Clean up split logic Remove unnecessary lifetime annotations Add per-column command padding * Add failing test case * Update #1054 to use unicode_segmentation This addresses feedback in PR #1054 Closes #1054 * Address cargo clippy, fmt --------- Co-authored-by: Sorenson Stallings <contact@sorenson.dev> Co-authored-by: Ellie Huxtable <ellie@elliehuxtable.com>
-rw-r--r--Cargo.lock5
-rw-r--r--atuin/Cargo.toml1
-rw-r--r--atuin/src/command/client/stats.rs144
3 files changed, 137 insertions, 13 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 608d5fc5..0f543bc7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -217,6 +217,7 @@ dependencies = [
"tracing",
"tracing-subscriber",
"tracing-tree",
+ "unicode-segmentation",
"unicode-width",
"uuid",
"whoami",
@@ -3937,9 +3938,9 @@ dependencies = [
[[package]]
name = "unicode-segmentation"
-version = "1.10.1"
+version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36"
+checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
[[package]]
name = "unicode-width"
diff --git a/atuin/Cargo.toml b/atuin/Cargo.toml
index 6ce997d6..4d04c67e 100644
--- a/atuin/Cargo.toml
+++ b/atuin/Cargo.toml
@@ -78,6 +78,7 @@ ratatui = "0.25"
tracing = "0.1"
cli-clipboard = { version = "0.4.0", optional = true }
uuid = { workspace = true }
+unicode-segmentation = "1.11.0"
[dependencies.tracing-subscriber]
diff --git a/atuin/src/command/client/stats.rs b/atuin/src/command/client/stats.rs
index c2636536..7f2e7aa8 100644
--- a/atuin/src/command/client/stats.rs
+++ b/atuin/src/command/client/stats.rs
@@ -1,6 +1,5 @@
use std::collections::{HashMap, HashSet};
-use atuin_common::utils::Escapable as _;
use clap::Parser;
use crossterm::style::{Color, ResetColor, SetAttribute, SetForegroundColor};
use eyre::Result;
@@ -12,6 +11,7 @@ use atuin_client::{
settings::Settings,
};
use time::{Duration, OffsetDateTime, Time};
+use unicode_segmentation::UnicodeSegmentation;
#[derive(Parser, Debug)]
#[command(infer_subcommands = true)]
@@ -22,12 +22,60 @@ pub struct Cmd {
/// How many top commands to list
#[arg(long, short, default_value = "10")]
count: usize,
+
+ /// The number of consecutive commands to consider
+ #[arg(long, short, default_value = "1")]
+ ngram_size: usize,
}
-fn compute_stats(settings: &Settings, history: &[History], count: usize) -> (usize, usize) {
+fn split_at_pipe(command: &str) -> Vec<&str> {
+ let mut result = vec![];
+ let mut quoted = false;
+ let mut start = 0;
+ let mut graphemes = UnicodeSegmentation::grapheme_indices(command, true);
+
+ while let Some((i, c)) = graphemes.next() {
+ let current = i;
+ match c {
+ "\"" => {
+ if command[start..current] != *"\"" {
+ quoted = !quoted;
+ }
+ }
+ "'" => {
+ if command[start..current] != *"'" {
+ quoted = !quoted;
+ }
+ }
+ "\\" => if graphemes.next().is_some() {},
+ "|" => {
+ if !quoted {
+ if command[start..].starts_with('|') {
+ start += 1;
+ }
+ result.push(&command[start..current]);
+ start = current;
+ }
+ }
+ _ => {}
+ }
+ }
+ if command[start..].starts_with('|') {
+ start += 1;
+ }
+ result.push(&command[start..]);
+ result
+}
+
+fn compute_stats(
+ settings: &Settings,
+ history: &[History],
+ count: usize,
+ ngram_size: usize,
+) -> (usize, usize) {
let mut commands = HashSet::<&str>::with_capacity(history.len());
- let mut prefixes = HashMap::<&str, usize>::with_capacity(history.len());
let mut total_unignored = 0;
+ let mut prefixes = HashMap::<Vec<&str>, usize>::with_capacity(history.len());
for i in history {
// just in case it somehow has a leading tab or space or something (legacy atuin didn't ignore space prefixes)
let command = i.command.trim();
@@ -39,7 +87,21 @@ fn compute_stats(settings: &Settings, history: &[History], count: usize) -> (usi
total_unignored += 1;
commands.insert(command);
- *prefixes.entry(prefix).or_default() += 1;
+
+ split_at_pipe(i.command.trim())
+ .iter()
+ .map(|l| {
+ let command = l.trim();
+ commands.insert(command);
+ command
+ })
+ .collect::<Vec<_>>()
+ .windows(ngram_size)
+ .for_each(|w| {
+ *prefixes
+ .entry(w.iter().map(|c| interesting_command(settings, c)).collect())
+ .or_default() += 1;
+ });
}
let unique = commands.len();
@@ -54,6 +116,17 @@ fn compute_stats(settings: &Settings, history: &[History], count: usize) -> (usi
let max = top.iter().map(|x| x.1).max().unwrap();
let num_pad = max.ilog10() as usize + 1;
+ // Find the length of the longest command name for each column
+ let column_widths = top
+ .iter()
+ .map(|(commands, _)| commands.iter().map(|c| c.len()).collect::<Vec<usize>>())
+ .fold(vec![0; ngram_size], |acc, item| {
+ acc.iter()
+ .zip(item.iter())
+ .map(|(a, i)| *std::cmp::max(a, i))
+ .collect()
+ });
+
for (command, count) in top {
let gray = SetForegroundColor(Color::Grey);
let bold = SetAttribute(crossterm::style::Attribute::Bold);
@@ -74,10 +147,14 @@ fn compute_stats(settings: &Settings, history: &[History], count: usize) -> (usi
print!(" ");
}
- println!(
- "{ResetColor}] {gray}{count:num_pad$}{ResetColor} {bold}{}{ResetColor}",
- command.escape_control()
- );
+ let formatted_command = command
+ .iter()
+ .zip(column_widths.iter())
+ .map(|(cmd, width)| format!("{cmd:width$}"))
+ .collect::<Vec<_>>()
+ .join(" | ");
+
+ println!("{ResetColor}] {gray}{count:num_pad$}{ResetColor} {bold}{formatted_command}{ResetColor}");
}
println!("Total commands: {total_unignored}");
println!("Unique commands: {unique}");
@@ -120,7 +197,7 @@ impl Cmd {
let end = start + Duration::days(1);
db.range(start, end).await?
};
- compute_stats(settings, &history, self.count);
+ compute_stats(settings, &history, self.count, self.ngram_size);
Ok(())
}
}
@@ -189,7 +266,7 @@ mod tests {
use time::OffsetDateTime;
use super::compute_stats;
- use super::interesting_command;
+ use super::{interesting_command, split_at_pipe};
#[test]
fn ignored_commands() {
@@ -209,7 +286,7 @@ mod tests {
.into(),
];
- let (total, unique) = compute_stats(&settings, &history, 10);
+ let (total, unique) = compute_stats(&settings, &history, 10, 1);
assert_eq!(total, 1);
assert_eq!(unique, 1);
}
@@ -312,4 +389,49 @@ mod tests {
"cargo build foo"
);
}
+
+ #[test]
+ fn split_simple() {
+ assert_eq!(split_at_pipe("fd | rg"), ["fd ", " rg"]);
+ }
+
+ #[test]
+ fn split_multi() {
+ assert_eq!(
+ split_at_pipe("kubectl | jq | rg"),
+ ["kubectl ", " jq ", " rg"]
+ );
+ }
+
+ #[test]
+ fn split_simple_quoted() {
+ assert_eq!(
+ split_at_pipe("foo | bar 'baz {} | quux' | xyzzy"),
+ ["foo ", " bar 'baz {} | quux' ", " xyzzy"]
+ );
+ }
+
+ #[test]
+ fn split_multi_quoted() {
+ assert_eq!(
+ split_at_pipe("foo | bar 'baz \"{}\" | quux' | xyzzy"),
+ ["foo ", " bar 'baz \"{}\" | quux' ", " xyzzy"]
+ );
+ }
+
+ #[test]
+ fn escaped_pipes() {
+ assert_eq!(
+ split_at_pipe("foo | bar baz \\| quux"),
+ ["foo ", " bar baz \\| quux"]
+ );
+ }
+
+ #[test]
+ fn emoji() {
+ assert_eq!(
+ split_at_pipe("git commit -m \"🚀\""),
+ ["git commit -m \"🚀\""]
+ );
+ }
}