summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorcyqsimon <28627918+cyqsimon@users.noreply.github.com>2023-03-02 18:05:19 +0800
committerGitHub <noreply@github.com>2023-03-02 10:05:19 +0000
commit63c572104bc6fa05e04197212fe59336a48ebb54 (patch)
treeac59cc7c8b31e1ce9781b0aad2276660933ecbcb
parenta0338905064c0df7be229887aeaff59c2c3251e4 (diff)
Rework Bash import (#747)
* Rework Bash import Closes #745 - Imported history is now ordered correctly - Timestamps (when `HISTTIMEFORMAT` is set) are handled correctly * Timestamp tests test for strict sorting
-rw-r--r--atuin-client/src/import/bash.rs181
1 files changed, 146 insertions, 35 deletions
diff --git a/atuin-client/src/import/bash.rs b/atuin-client/src/import/bash.rs
index 10e8de1e..520b49c8 100644
--- a/atuin-client/src/import/bash.rs
+++ b/atuin-client/src/import/bash.rs
@@ -1,8 +1,10 @@
-use std::{fs::File, io::Read, path::PathBuf};
+use std::{fs::File, io::Read, path::PathBuf, str};
use async_trait::async_trait;
+use chrono::{DateTime, Duration, NaiveDateTime, Utc};
use directories::UserDirs;
use eyre::{eyre, Result};
+use itertools::Itertools;
use super::{get_histpath, unix_byte_lines, Importer, Loader};
use crate::history::History;
@@ -32,37 +34,54 @@ impl Importer for Bash {
}
async fn entries(&mut self) -> Result<usize> {
- Ok(super::count_lines(&self.bytes))
+ let count = unix_byte_lines(&self.bytes)
+ .map(LineType::from)
+ .filter(|line| matches!(line, LineType::Command(_)))
+ .count();
+ Ok(count)
}
async fn load(self, h: &mut impl Loader) -> Result<()> {
- let now = chrono::Utc::now();
- let mut line = String::new();
-
- for (i, b) in unix_byte_lines(&self.bytes).enumerate() {
- let s = match std::str::from_utf8(b) {
- Ok(s) => s,
- Err(_) => continue, // we can skip past things like invalid utf8
- };
-
- if let Some(s) = s.strip_suffix('\\') {
- line.push_str(s);
- line.push_str("\\\n");
- } else {
- line.push_str(s);
- let command = std::mem::take(&mut line);
-
- let offset = chrono::Duration::seconds(i as i64);
- h.push(History::new(
- now - offset, // preserve ordering
- command,
- String::from("unknown"),
- -1,
- -1,
- None,
- None,
- ))
- .await?;
+ let lines = unix_byte_lines(&self.bytes)
+ .map(LineType::from)
+ .filter(|line| !matches!(line, LineType::NotUtf8)) // invalid utf8 are ignored
+ .collect_vec();
+
+ let (commands_before_first_timestamp, first_timestamp) = lines
+ .iter()
+ .enumerate()
+ .find_map(|(i, line)| match line {
+ LineType::Timestamp(t) => Some((i, *t)),
+ _ => None,
+ })
+ // if no known timestamps, use now as base
+ .unwrap_or((lines.len(), Utc::now()));
+
+ // if no timestamp is recorded, then use this increment to set an arbitrary timestamp
+ // to preserve ordering
+ let timestamp_increment = Duration::seconds(1);
+ // make sure there is a minimum amount of time before the first known timestamp
+ // to fit all commands, given the default increment
+ let mut next_timestamp =
+ first_timestamp - timestamp_increment * commands_before_first_timestamp as i32;
+
+ for line in lines.into_iter() {
+ match line {
+ LineType::NotUtf8 => unreachable!(), // already filtered
+ LineType::Timestamp(t) => next_timestamp = t,
+ LineType::Command(c) => {
+ let entry = History::new(
+ next_timestamp,
+ c.into(),
+ "unknown".into(),
+ -1,
+ -1,
+ None,
+ None,
+ );
+ h.push(entry).await?;
+ next_timestamp += timestamp_increment;
+ }
}
}
@@ -70,18 +89,47 @@ impl Importer for Bash {
}
}
+#[derive(Debug, Clone)]
+enum LineType<'a> {
+ NotUtf8,
+ /// A timestamp line start with a '#', followed immediately by an integer
+ /// that represents seconds since UNIX epoch.
+ Timestamp(DateTime<Utc>),
+ /// Anything that doesn't look like a timestamp.
+ Command(&'a str),
+}
+impl<'a> From<&'a [u8]> for LineType<'a> {
+ fn from(bytes: &'a [u8]) -> Self {
+ let Ok(line) = str::from_utf8(bytes) else {
+ return LineType::NotUtf8;
+ };
+ let parsed = match try_parse_line_as_timestamp(line) {
+ Some(time) => LineType::Timestamp(time),
+ None => LineType::Command(line),
+ };
+ parsed
+ }
+}
+
+fn try_parse_line_as_timestamp(line: &str) -> Option<DateTime<Utc>> {
+ let seconds = line.strip_prefix('#')?.parse().ok()?;
+ let time = NaiveDateTime::from_timestamp(seconds, 0);
+ Some(DateTime::from_utc(time, Utc))
+}
+
#[cfg(test)]
-mod tests {
- use itertools::assert_equal;
+mod test {
+ use std::cmp::Ordering;
+
+ use itertools::{assert_equal, Itertools};
use crate::import::{tests::TestLoader, Importer};
use super::Bash;
#[tokio::test]
- async fn test_parse_file() {
+ async fn parse_no_timestamps() {
let bytes = r"cargo install atuin
-cargo install atuin; \
cargo update
cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷
"
@@ -89,7 +137,7 @@ cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷
.to_owned();
let mut bash = Bash { bytes };
- assert_eq!(bash.entries().await.unwrap(), 4);
+ assert_eq!(bash.entries().await.unwrap(), 3);
let mut loader = TestLoader::default();
bash.load(&mut loader).await.unwrap();
@@ -98,9 +146,72 @@ cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷
loader.buf.iter().map(|h| h.command.as_str()),
[
"cargo install atuin",
- "cargo install atuin; \\\ncargo update",
+ "cargo update",
"cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷",
],
);
+ assert!(is_strictly_sorted(
+ loader.buf.iter().map(|h| h.timestamp.timestamp())
+ ))
+ }
+
+ #[tokio::test]
+ async fn parse_with_timestamps() {
+ let bytes = b"#1672918999
+git reset
+#1672919006
+git clean -dxf
+#1672919020
+cd ../
+"
+ .to_vec();
+
+ let mut bash = Bash { bytes };
+ assert_eq!(bash.entries().await.unwrap(), 3);
+
+ let mut loader = TestLoader::default();
+ bash.load(&mut loader).await.unwrap();
+
+ assert_equal(
+ loader.buf.iter().map(|h| h.command.as_str()),
+ ["git reset", "git clean -dxf", "cd ../"],
+ );
+ assert_equal(
+ loader.buf.iter().map(|h| h.timestamp.timestamp()),
+ [1672918999, 1672919006, 1672919020],
+ )
+ }
+
+ #[tokio::test]
+ async fn parse_with_partial_timestamps() {
+ let bytes = b"git reset
+#1672919006
+git clean -dxf
+cd ../
+"
+ .to_vec();
+
+ let mut bash = Bash { bytes };
+ assert_eq!(bash.entries().await.unwrap(), 3);
+
+ let mut loader = TestLoader::default();
+ bash.load(&mut loader).await.unwrap();
+
+ assert_equal(
+ loader.buf.iter().map(|h| h.command.as_str()),
+ ["git reset", "git clean -dxf", "cd ../"],
+ );
+ assert!(is_strictly_sorted(
+ loader.buf.iter().map(|h| h.timestamp.timestamp())
+ ))
+ }
+
+ fn is_strictly_sorted<T>(iter: impl IntoIterator<Item = T>) -> bool
+ where
+ T: Clone + PartialOrd,
+ {
+ iter.into_iter()
+ .tuple_windows()
+ .all(|(a, b)| matches!(a.partial_cmp(&b), Some(Ordering::Less)))
}
}