summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthias Beyer <mail@beyermatthias.de>2021-04-18 10:41:38 +0200
committerMatthias Beyer <mail@beyermatthias.de>2021-04-18 11:00:19 +0200
commitdfc38880b32112f2dda6b6c55c6c55830b0c92e7 (patch)
treee79c652eb48b78c9a22e10d61fadd05335cc34f5
parent8dfd21ed430e9407880a5fc640b3351ccf34db5c (diff)
downloadfss-dfc38880b32112f2dda6b6c55c6c55830b0c92e7.tar.gz
fss-dfc38880b32112f2dda6b6c55c6c55830b0c92e7.tar.xz
Move to dedicated types for filetype parsing
Signed-off-by: Matthias Beyer <mail@beyermatthias.de>
-rw-r--r--src/ft/mod.rs30
-rw-r--r--src/server.rs39
2 files changed, 58 insertions, 11 deletions
diff --git a/src/ft/mod.rs b/src/ft/mod.rs
new file mode 100644
index 0000000..6b91848
--- /dev/null
+++ b/src/ft/mod.rs
@@ -0,0 +1,30 @@
+use std::path::PathBuf;
+use std::path::Path;
+
+use anyhow::Result;
+use crate::Server;
+
+pub trait FileTypeParser {
+ fn parse(&self, server: &Server, path: &Path, ext: &str, doc: tantivy::Document) -> Result<tantivy::Document>;
+}
+
+
+pub struct TextFileParser;
+
+impl FileTypeParser for TextFileParser {
+ fn parse(&self, server: &Server, path: &Path, ext: &str, mut doc: tantivy::Document) -> Result<tantivy::Document> {
+ let body = std::fs::read_to_string(path)?;
+ doc.add_text(server.field_body(), body);
+ Ok(doc)
+ }
+}
+
+
+pub struct MarkdownParser;
+
+impl FileTypeParser for MarkdownParser {
+ fn parse(&self, server: &Server, path: &Path, ext: &str, doc: tantivy::Document) -> Result<tantivy::Document> {
+ unimplemented!()
+ }
+}
+
diff --git a/src/server.rs b/src/server.rs
index bb7a550..61251b6 100644
--- a/src/server.rs
+++ b/src/server.rs
@@ -1,5 +1,6 @@
use std::str::FromStr;
use std::path::PathBuf;
+use std::path::Path;
use anyhow::anyhow;
use anyhow::Error;
@@ -20,41 +21,57 @@ use fss::SearchResponse;
mod config;
mod cli;
+mod ft;
mod schema;
mod fss {
tonic::include_proto!("fss"); // The string specified here must match the proto package name
}
+#[derive(getset::CopyGetters)]
pub struct Server {
index: tantivy::Index,
+ #[getset(get_copy = "pub")]
field_path: tantivy::schema::Field,
+
+ #[getset(get_copy = "pub")]
field_ft: tantivy::schema::Field,
+
+ #[getset(get_copy = "pub")]
field_body: tantivy::schema::Field,
}
impl Server {
- fn write_file_to_index(&self, filepath: &str) -> Result<()> {
- let mut index_writer = self.index.writer(50_000_000)?;
- let path = PathBuf::from(filepath);
-
- let filetype = path.extension()
+ fn write_file_to_index(&self, filepath: &Path) -> Result<()> {
+ let ext = filepath
+ .extension()
.map(ToOwned::to_owned)
.and_then(|osstr| osstr.to_str().map(|s| s.to_string()))
- .ok_or_else(|| anyhow!("Path {} is not UTF8", filepath))?;
+ .ok_or_else(|| anyhow!("Path {} is not UTF8", filepath.display()))?;
let mut doc = tantivy::Document::default();
- doc.add_text(self.field_path, filepath);
- doc.add_text(self.field_ft, &filetype);
-
- doc.add_text(self.field_body, std::fs::read_to_string(path)?);
+ doc.add_text(self.field_path(), filepath.display().to_string());
+ doc.add_text(self.field_ft(), &ext);
+ let doc = self.parse_for_file(doc, filepath, &ext)?;
+ let mut index_writer = self.index.writer(50_000_000)?;
index_writer.add_document(doc);
index_writer.commit()?;
Ok(())
}
+ fn parse_for_file(&self, doc: tantivy::Document, filepath: &Path, ext: &str) -> Result<tantivy::Document> {
+ use crate::ft::FileTypeParser;
+ use crate::ft::TextFileParser;
+
+ match ext {
+ "txt" => Ok(TextFileParser {}),
+ _ => Err(anyhow!("No parser available for {}", ext))
+ }?
+ .parse(&self, filepath, ext, doc)
+ }
+
fn search(&self, query_str: &str) -> Result<Vec<PathBuf>> {
let reader = self.index
.reader_builder()
@@ -99,7 +116,7 @@ impl fss::fss_server::Fss for Server {
}
async fn index_file(&self, request: Request<IndexFileRequest>) -> Result<Response<IndexFileReply>, Status> {
- let error = match self.write_file_to_index(&request.get_ref().path) {
+ let error = match self.write_file_to_index(request.get_ref().path.as_ref()) {
Ok(()) => false,
Err(e) => {
log::error!("Error writing to index: {} -> {:?}", request.get_ref().path, e);