From dfc38880b32112f2dda6b6c55c6c55830b0c92e7 Mon Sep 17 00:00:00 2001 From: Matthias Beyer Date: Sun, 18 Apr 2021 10:41:38 +0200 Subject: Move to dedicated types for filetype parsing Signed-off-by: Matthias Beyer --- src/ft/mod.rs | 30 ++++++++++++++++++++++++++++++ src/server.rs | 39 ++++++++++++++++++++++++++++----------- 2 files changed, 58 insertions(+), 11 deletions(-) create mode 100644 src/ft/mod.rs diff --git a/src/ft/mod.rs b/src/ft/mod.rs new file mode 100644 index 0000000..6b91848 --- /dev/null +++ b/src/ft/mod.rs @@ -0,0 +1,30 @@ +use std::path::PathBuf; +use std::path::Path; + +use anyhow::Result; +use crate::Server; + +pub trait FileTypeParser { + fn parse(&self, server: &Server, path: &Path, ext: &str, doc: tantivy::Document) -> Result; +} + + +pub struct TextFileParser; + +impl FileTypeParser for TextFileParser { + fn parse(&self, server: &Server, path: &Path, ext: &str, mut doc: tantivy::Document) -> Result { + let body = std::fs::read_to_string(path)?; + doc.add_text(server.field_body(), body); + Ok(doc) + } +} + + +pub struct MarkdownParser; + +impl FileTypeParser for MarkdownParser { + fn parse(&self, server: &Server, path: &Path, ext: &str, doc: tantivy::Document) -> Result { + unimplemented!() + } +} + diff --git a/src/server.rs b/src/server.rs index bb7a550..61251b6 100644 --- a/src/server.rs +++ b/src/server.rs @@ -1,5 +1,6 @@ use std::str::FromStr; use std::path::PathBuf; +use std::path::Path; use anyhow::anyhow; use anyhow::Error; @@ -20,41 +21,57 @@ use fss::SearchResponse; mod config; mod cli; +mod ft; mod schema; mod fss { tonic::include_proto!("fss"); // The string specified here must match the proto package name } +#[derive(getset::CopyGetters)] pub struct Server { index: tantivy::Index, + #[getset(get_copy = "pub")] field_path: tantivy::schema::Field, + + #[getset(get_copy = "pub")] field_ft: tantivy::schema::Field, + + #[getset(get_copy = "pub")] field_body: tantivy::schema::Field, } impl Server { - fn write_file_to_index(&self, filepath: &str) -> Result<()> { - let mut index_writer = self.index.writer(50_000_000)?; - let path = PathBuf::from(filepath); - - let filetype = path.extension() + fn write_file_to_index(&self, filepath: &Path) -> Result<()> { + let ext = filepath + .extension() .map(ToOwned::to_owned) .and_then(|osstr| osstr.to_str().map(|s| s.to_string())) - .ok_or_else(|| anyhow!("Path {} is not UTF8", filepath))?; + .ok_or_else(|| anyhow!("Path {} is not UTF8", filepath.display()))?; let mut doc = tantivy::Document::default(); - doc.add_text(self.field_path, filepath); - doc.add_text(self.field_ft, &filetype); - - doc.add_text(self.field_body, std::fs::read_to_string(path)?); + doc.add_text(self.field_path(), filepath.display().to_string()); + doc.add_text(self.field_ft(), &ext); + let doc = self.parse_for_file(doc, filepath, &ext)?; + let mut index_writer = self.index.writer(50_000_000)?; index_writer.add_document(doc); index_writer.commit()?; Ok(()) } + fn parse_for_file(&self, doc: tantivy::Document, filepath: &Path, ext: &str) -> Result { + use crate::ft::FileTypeParser; + use crate::ft::TextFileParser; + + match ext { + "txt" => Ok(TextFileParser {}), + _ => Err(anyhow!("No parser available for {}", ext)) + }? + .parse(&self, filepath, ext, doc) + } + fn search(&self, query_str: &str) -> Result> { let reader = self.index .reader_builder() @@ -99,7 +116,7 @@ impl fss::fss_server::Fss for Server { } async fn index_file(&self, request: Request) -> Result, Status> { - let error = match self.write_file_to_index(&request.get_ref().path) { + let error = match self.write_file_to_index(request.get_ref().path.as_ref()) { Ok(()) => false, Err(e) => { log::error!("Error writing to index: {} -> {:?}", request.get_ref().path, e); -- cgit v1.2.3