From 48b4e347766c378658f848e846192010fb3c92ee Mon Sep 17 00:00:00 2001 From: Ellie Huxtable Date: Tue, 2 Apr 2024 12:13:00 +0100 Subject: feat(search): add better search scoring (#1885) * feat(search): add better search scoring * add opt-in --- atuin-client/src/settings.rs | 2 ++ atuin/src/command/client/search.rs | 1 + atuin/src/command/client/search/interactive.rs | 17 +++++++--- atuin/src/command/client/search/sort.rs | 46 ++++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 4 deletions(-) create mode 100644 atuin/src/command/client/search/sort.rs diff --git a/atuin-client/src/settings.rs b/atuin-client/src/settings.rs index 487e61b7..97a0b4b7 100644 --- a/atuin-client/src/settings.rs +++ b/atuin-client/src/settings.rs @@ -385,6 +385,7 @@ pub struct Settings { pub network_timeout: u64, pub local_timeout: f64, pub enter_accept: bool, + pub smart_sort: bool, #[serde(default)] pub stats: Stats, @@ -631,6 +632,7 @@ impl Settings { .set_default("keymap_mode", "emacs")? .set_default("keymap_mode_shell", "auto")? .set_default("keymap_cursor", HashMap::::new())? + .set_default("smart_sort", false)? .set_default( "prefers_reduced_motion", std::env::var("NO_MOTION") diff --git a/atuin/src/command/client/search.rs b/atuin/src/command/client/search.rs index f3626afe..f645d26b 100644 --- a/atuin/src/command/client/search.rs +++ b/atuin/src/command/client/search.rs @@ -21,6 +21,7 @@ mod engines; mod history_list; mod inspector; mod interactive; +mod sort; pub use duration::format_duration_into; diff --git a/atuin/src/command/client/search/interactive.rs b/atuin/src/command/client/search/interactive.rs index f6abb00d..6c2c0447 100644 --- a/atuin/src/command/client/search/interactive.rs +++ b/atuin/src/command/client/search/interactive.rs @@ -29,6 +29,7 @@ use super::{ cursor::Cursor, engines::{SearchEngine, SearchState}, history_list::{HistoryList, ListState, PREFIX_LENGTH}, + sort, }; use crate::{command::client::search::engines, VERSION}; @@ -82,13 +83,21 @@ struct StyleState { } impl State { - async fn query_results(&mut self, db: &mut dyn Database) -> Result> { + async fn query_results( + &mut self, + db: &mut dyn Database, + smart_sort: bool, + ) -> Result> { let results = self.engine.query(&self.search, db).await?; self.results_state.select(0); self.results_len = results.len(); - Ok(results) + if smart_sort { + Ok(sort::sort(self.search.input.as_str(), results)) + } else { + Ok(results) + } } fn handle_input( @@ -1003,7 +1012,7 @@ pub async fn history( app.initialize_keymap_cursor(settings); - let mut results = app.query_results(&mut db).await?; + let mut results = app.query_results(&mut db, settings.smart_sort).await?; let mut stats: Option = None; let accept; @@ -1064,7 +1073,7 @@ pub async fn history( || initial_filter_mode != app.search.filter_mode || initial_search_mode != app.search_mode { - results = app.query_results(&mut db).await?; + results = app.query_results(&mut db, settings.smart_sort).await?; } stats = if app.tab_index == 0 { diff --git a/atuin/src/command/client/search/sort.rs b/atuin/src/command/client/search/sort.rs new file mode 100644 index 00000000..4465a142 --- /dev/null +++ b/atuin/src/command/client/search/sort.rs @@ -0,0 +1,46 @@ +use atuin_client::history::History; + +type ScoredHistory = (f64, History); + +// Fuzzy search already comes sorted by minspan +// This sorting should be applicable to all search modes, and solve the more "obvious" issues +// first. +// Later on, we can pass in context and do some boosts there too. +pub fn sort(query: &str, input: Vec) -> Vec { + // This can totally be extended. We need to be _careful_ that it's not slow. + // We also need to balance sorting db-side with sorting here. SQLite can do a lot, + // but some things are just much easier/more doable in Rust. + + let mut scored = input + .into_iter() + .map(|h| { + // If history is _prefixed_ with the query, score it more highly + let score = if h.command.starts_with(query) { + 2.0 + } else if h.command.contains(query) { + 1.75 + } else { + 1.0 + }; + + // calculate how long ago the history was, in seconds + let now = time::OffsetDateTime::now_utc().unix_timestamp(); + let time = h.timestamp.unix_timestamp(); + let diff = std::cmp::max(1, now - time); // no /0 please + + // prefer newer history, but not hugely so as to offset the other scoring + // the numbers will get super small over time, but I don't want time to overpower other + // scoring + #[allow(clippy::cast_precision_loss)] + let time_score = 1.0 + (1.0 / diff as f64); + let score = score * time_score; + + (score, h) + }) + .collect::>(); + + scored.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap().reverse()); + + // Remove the scores and return the history + scored.into_iter().map(|(_, h)| h).collect::>() +} -- cgit v1.2.3