use std::io; use std::path::Path; use std::process::Command; use std::str; use std::str::Utf8Error; use regex::Regex; #[derive(Debug, PartialEq)] pub struct Blob<'a> { pub object: &'a str, pub path: &'a Path, } pub fn crawl_git_tree>(path: P) -> io::Result> { let output = Command::new("git") .current_dir(path) .args(&["ls-tree", "-zr", "HEAD"]) .output()?; if !output.status.success() { return Err(io::Error::new( io::ErrorKind::Other, "git ls-tree did not run successfully", )); } Ok(output.stdout) } pub fn parse_ls_tree_output<'a>(output: &'a [u8]) -> Result>, Utf8Error> { let re = Regex::new(r"^[^ ]+ [^ ]+ ([^\t]+)\t(.+)$").unwrap(); let mut blobs = Vec::new(); for line in output.split(|&byte| byte == 0) { let line = str::from_utf8(line)?; if let Some(captures) = re.captures(line) { blobs.push(Blob { object: captures.get(1).unwrap().as_str(), path: Path::new(captures.get(2).unwrap().as_str()), }) } } Ok(blobs) } #[cfg(test)] mod tests { use super::*; use crate::git::{parse_ls_tree_output, Blob}; const TEST_OUTPUT: &[u8] = b"100644 blob 424965736fa85c814e0ecb12d42ec81021304ce9\tREADME.md\0invalid\0100644 blob 5ec8f34bce9b5b53f3d809591914fa1601534a8a src/main.rs\0"; #[test] fn test_parse_ls_tree_output() { let expected = vec![ Blob { object: "424965736fa85c814e0ecb12d42ec81021304ce9", path: Path::new("README.md"), }, Blob { object: "5ec8f34bce9b5b53f3d809591914fa1601534a8a", path: Path::new("src/main.rs"), }, ]; assert_eq!(parse_ls_tree_output(TEST_OUTPUT).unwrap(), expected); } }