summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/cli.rs36
-rw-r--r--src/config.rs8
-rw-r--r--src/main.rs107
-rw-r--r--src/schema.rs16
4 files changed, 167 insertions, 0 deletions
diff --git a/src/cli.rs b/src/cli.rs
new file mode 100644
index 0000000..9098cfe
--- /dev/null
+++ b/src/cli.rs
@@ -0,0 +1,36 @@
+use clap::App;
+use clap::Arg;
+use clap::crate_authors;
+use clap::crate_version;
+
+pub fn app<'a>() -> App<'a, 'a> {
+
+ App::new("fss")
+ .author(crate_authors!())
+ .version(crate_version!())
+ .about("Filesystemsearch")
+
+ .subcommand(App::new("index")
+ .version(crate_version!())
+ .about("Index a file")
+ .arg(Arg::with_name("file")
+ .required(true)
+ .multiple(true)
+ .value_name("FILE")
+ .help("Index these files")
+ )
+ )
+
+ .subcommand(App::new("search")
+ .version(crate_version!())
+ .about("Search for a file")
+ .arg(Arg::with_name("term")
+ .required(true)
+ .multiple(true)
+ .value_name("TERM")
+ .help("Search with these terms")
+ )
+ )
+}
+
+
diff --git a/src/config.rs b/src/config.rs
new file mode 100644
index 0000000..27dddb8
--- /dev/null
+++ b/src/config.rs
@@ -0,0 +1,8 @@
+use std::path::PathBuf;
+
+#[derive(Debug, serde::Deserialize, getset::Getters)]
+pub struct Config {
+ #[getset(get = "pub")]
+ database_path: PathBuf
+}
+
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..8fd5fb2
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,107 @@
+use std::io::Write;
+use std::path::PathBuf;
+
+use anyhow::Context;
+use anyhow::Error;
+use anyhow::Result;
+use anyhow::anyhow;
+use itertools::Itertools;
+
+mod config;
+mod cli;
+mod schema;
+
+fn main() -> Result<()> {
+ let cli = crate::cli::app();
+ let _ = env_logger::try_init()?;
+ let mut config = ::config::Config::default();
+ {
+ let xdg = xdg::BaseDirectories::with_prefix("fss")?;
+ let xdg_config = xdg.find_config_file("config.toml")
+ .ok_or_else(|| anyhow!("No configuration file found with XDG: {}", xdg.get_config_home().display()))?;
+
+ log::debug!("Configuration file found with XDG: {}", xdg_config.display());
+ config.merge(::config::File::from(xdg_config).required(false))
+ .context("Failed to load config.toml from XDG configuration directory")?;
+ }
+ let config = config.try_into::<crate::config::Config>()?;
+
+ let index_path = tantivy::directory::MmapDirectory::open(config.database_path())?;
+ let schema = crate::schema::schema();
+
+ let index = tantivy::Index::open_or_create(index_path, schema.clone())?;
+
+ let field_path = schema.get_field("path")
+ .ok_or_else(|| anyhow!("BUG"))?;
+ let field_ft = schema.get_field("ft")
+ .ok_or_else(|| anyhow!("BUG"))?;
+ let field_body = schema.get_field("body")
+ .ok_or_else(|| anyhow!("BUG"))?;
+
+ match cli.get_matches().subcommand() {
+ ("index", Some(mtch)) => {
+ let mut index_writer = index.writer(50_000_000)?;
+ mtch.values_of("file")
+ .unwrap() // safe by clap
+ .map(|filepath| {
+ let path_str = String::from(filepath);
+ let path = PathBuf::from(&path_str);
+
+ let filetype = path.extension()
+ .map(ToOwned::to_owned)
+ .and_then(|osstr| osstr.to_str().map(|s| s.to_string()))
+ .ok_or_else(|| anyhow!("Path {} is not UTF8", filepath))?;
+
+ let mut doc = tantivy::Document::default();
+ doc.add_text(field_path, &path_str);
+ doc.add_text(field_ft, &filetype);
+
+ doc.add_text(field_body, std::fs::read_to_string(path)?);
+
+ index_writer.add_document(doc);
+ Ok(())
+ })
+ .collect::<Result<Vec<_>>>()?;
+
+ index_writer.commit()?;
+ Ok(())
+ },
+
+ ("search", Some(mtch)) => {
+ let query_str = mtch.values_of("term")
+ .unwrap() // safe by clap
+ .join(" ");
+
+ let reader = index
+ .reader_builder()
+ .reload_policy(tantivy::ReloadPolicy::OnCommit)
+ .try_into()?;
+
+ let searcher = reader.searcher();
+ let query_parser = tantivy::query::QueryParser::for_index(&index, vec![field_path.clone(), field_ft, field_body]);
+ let query = query_parser.parse_query(&query_str)?;
+
+ let top_docs = searcher.search(&query, &tantivy::collector::TopDocs::with_limit(10))?;
+ let mut output = std::io::stdout();
+
+ top_docs.into_iter()
+ .map(|(_score, adr)| {
+ let retrieved_doc = searcher.doc(adr)?;
+ retrieved_doc.get_all(field_path)
+ .map(|value| {
+ value.text().ok_or_else(|| anyhow!("Not a text value.."))
+ })
+ .map_ok(|txt| {
+ writeln!(output, "{}", txt).map_err(Error::from)
+ })
+ .collect::<Result<Vec<_>>>()
+ })
+ .collect::<Result<Vec<Vec<_>>>>()
+ .map(|_| ())
+ },
+
+ (_other, _) => {
+ unimplemented!()
+ },
+ }
+}
diff --git a/src/schema.rs b/src/schema.rs
new file mode 100644
index 0000000..9a137eb
--- /dev/null
+++ b/src/schema.rs
@@ -0,0 +1,16 @@
+use tantivy::schema::*;
+
+pub fn schema() -> Schema {
+ let mut schema_builder = Schema::builder();
+
+ let body_options = TextOptions::default()
+ .set_stored()
+ .set_indexing_options(TextFieldIndexing::default()
+ .set_tokenizer("default")
+ .set_index_option(IndexRecordOption::WithFreqsAndPositions));
+
+ schema_builder.add_text_field("path", STRING | STORED);
+ schema_builder.add_text_field("ft", STRING | STORED);
+ schema_builder.add_text_field("body", body_options );
+ schema_builder.build()
+}