From 056a0728c775e37460ed00791ad503e03a88f3d6 Mon Sep 17 00:00:00 2001 From: Matthias Beyer Date: Sat, 17 Apr 2021 14:37:46 +0200 Subject: Initial import Signed-off-by: Matthias Beyer --- src/main.rs | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 src/main.rs (limited to 'src/main.rs') diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..8fd5fb2 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,107 @@ +use std::io::Write; +use std::path::PathBuf; + +use anyhow::Context; +use anyhow::Error; +use anyhow::Result; +use anyhow::anyhow; +use itertools::Itertools; + +mod config; +mod cli; +mod schema; + +fn main() -> Result<()> { + let cli = crate::cli::app(); + let _ = env_logger::try_init()?; + let mut config = ::config::Config::default(); + { + let xdg = xdg::BaseDirectories::with_prefix("fss")?; + let xdg_config = xdg.find_config_file("config.toml") + .ok_or_else(|| anyhow!("No configuration file found with XDG: {}", xdg.get_config_home().display()))?; + + log::debug!("Configuration file found with XDG: {}", xdg_config.display()); + config.merge(::config::File::from(xdg_config).required(false)) + .context("Failed to load config.toml from XDG configuration directory")?; + } + let config = config.try_into::()?; + + let index_path = tantivy::directory::MmapDirectory::open(config.database_path())?; + let schema = crate::schema::schema(); + + let index = tantivy::Index::open_or_create(index_path, schema.clone())?; + + let field_path = schema.get_field("path") + .ok_or_else(|| anyhow!("BUG"))?; + let field_ft = schema.get_field("ft") + .ok_or_else(|| anyhow!("BUG"))?; + let field_body = schema.get_field("body") + .ok_or_else(|| anyhow!("BUG"))?; + + match cli.get_matches().subcommand() { + ("index", Some(mtch)) => { + let mut index_writer = index.writer(50_000_000)?; + mtch.values_of("file") + .unwrap() // safe by clap + .map(|filepath| { + let path_str = String::from(filepath); + let path = PathBuf::from(&path_str); + + let filetype = path.extension() + .map(ToOwned::to_owned) + .and_then(|osstr| osstr.to_str().map(|s| s.to_string())) + .ok_or_else(|| anyhow!("Path {} is not UTF8", filepath))?; + + let mut doc = tantivy::Document::default(); + doc.add_text(field_path, &path_str); + doc.add_text(field_ft, &filetype); + + doc.add_text(field_body, std::fs::read_to_string(path)?); + + index_writer.add_document(doc); + Ok(()) + }) + .collect::>>()?; + + index_writer.commit()?; + Ok(()) + }, + + ("search", Some(mtch)) => { + let query_str = mtch.values_of("term") + .unwrap() // safe by clap + .join(" "); + + let reader = index + .reader_builder() + .reload_policy(tantivy::ReloadPolicy::OnCommit) + .try_into()?; + + let searcher = reader.searcher(); + let query_parser = tantivy::query::QueryParser::for_index(&index, vec![field_path.clone(), field_ft, field_body]); + let query = query_parser.parse_query(&query_str)?; + + let top_docs = searcher.search(&query, &tantivy::collector::TopDocs::with_limit(10))?; + let mut output = std::io::stdout(); + + top_docs.into_iter() + .map(|(_score, adr)| { + let retrieved_doc = searcher.doc(adr)?; + retrieved_doc.get_all(field_path) + .map(|value| { + value.text().ok_or_else(|| anyhow!("Not a text value..")) + }) + .map_ok(|txt| { + writeln!(output, "{}", txt).map_err(Error::from) + }) + .collect::>>() + }) + .collect::>>>() + .map(|_| ()) + }, + + (_other, _) => { + unimplemented!() + }, + } +} -- cgit v1.2.3