summaryrefslogtreecommitdiffstats
path: root/src/tokenizer
diff options
context:
space:
mode:
authorPaul Masurel <paul.masurel@gmail.com>2018-04-21 20:05:36 +0900
committerPaul Masurel <paul.masurel@gmail.com>2018-04-21 20:05:36 +0900
commit78673172d001a0c4c7c73cdd3d9923fc43fc0312 (patch)
treee1099b4e48597f7340074d421e8a1f59da2c9f0c /src/tokenizer
parent175b76f1199e8db4a1515fb40a60187e02b6fb64 (diff)
Cargo fmt
Diffstat (limited to 'src/tokenizer')
-rw-r--r--src/tokenizer/facet_tokenizer.rs4
-rw-r--r--src/tokenizer/mod.rs30
-rw-r--r--src/tokenizer/simple_tokenizer.rs2
-rw-r--r--src/tokenizer/stemmer.rs2
-rw-r--r--src/tokenizer/tokenizer.rs1
-rw-r--r--src/tokenizer/tokenizer_manager.rs10
6 files changed, 24 insertions, 25 deletions
diff --git a/src/tokenizer/facet_tokenizer.rs b/src/tokenizer/facet_tokenizer.rs
index fd408a4..3218319 100644
--- a/src/tokenizer/facet_tokenizer.rs
+++ b/src/tokenizer/facet_tokenizer.rs
@@ -1,6 +1,6 @@
use super::{Token, TokenStream, Tokenizer};
-use std::str;
use schema::FACET_SEP_BYTE;
+use std::str;
/// The `FacetTokenizer` process a `Facet` binary representation
/// and emits a token for all of its parent.
@@ -84,10 +84,10 @@ impl<'a> TokenStream for FacetTokenStream<'a> {
#[cfg(test)]
mod tests {
- use tokenizer::{Token, TokenStream, Tokenizer};
use super::FacetTokenizer;
use schema::Facet;
use std::str;
+ use tokenizer::{Token, TokenStream, Tokenizer};
#[test]
fn test_facet_tokenizer() {
diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs
index f885df1..70bf35a 100644
--- a/src/tokenizer/mod.rs
+++ b/src/tokenizer/mod.rs
@@ -128,30 +128,30 @@
//! # }
//! ```
//!
-mod tokenizer;
-mod simple_tokenizer;
+mod alphanum_only;
+mod facet_tokenizer;
+mod japanese_tokenizer;
mod lower_caser;
+mod raw_tokenizer;
mod remove_long;
+mod simple_tokenizer;
mod stemmer;
-mod facet_tokenizer;
-mod tokenizer_manager;
-mod japanese_tokenizer;
mod token_stream_chain;
-mod raw_tokenizer;
-mod alphanum_only;
+mod tokenizer;
+mod tokenizer_manager;
pub use self::alphanum_only::AlphaNumOnlyFilter;
-pub use self::tokenizer::{Token, TokenFilter, TokenStream, Tokenizer};
-pub use self::tokenizer::BoxedTokenizer;
-pub use self::tokenizer_manager::TokenizerManager;
-pub use self::simple_tokenizer::SimpleTokenizer;
-pub use self::raw_tokenizer::RawTokenizer;
-pub(crate) use self::token_stream_chain::TokenStreamChain;
+pub use self::facet_tokenizer::FacetTokenizer;
pub use self::japanese_tokenizer::JapaneseTokenizer;
-pub use self::remove_long::RemoveLongFilter;
pub use self::lower_caser::LowerCaser;
+pub use self::raw_tokenizer::RawTokenizer;
+pub use self::remove_long::RemoveLongFilter;
+pub use self::simple_tokenizer::SimpleTokenizer;
pub use self::stemmer::Stemmer;
-pub use self::facet_tokenizer::FacetTokenizer;
+pub(crate) use self::token_stream_chain::TokenStreamChain;
+pub use self::tokenizer::BoxedTokenizer;
+pub use self::tokenizer::{Token, TokenFilter, TokenStream, Tokenizer};
+pub use self::tokenizer_manager::TokenizerManager;
#[cfg(test)]
mod test {
diff --git a/src/tokenizer/simple_tokenizer.rs b/src/tokenizer/simple_tokenizer.rs
index 69f7b09..5f1ca0e 100644
--- a/src/tokenizer/simple_tokenizer.rs
+++ b/src/tokenizer/simple_tokenizer.rs
@@ -1,5 +1,5 @@
-use std::str::CharIndices;
use super::{Token, TokenStream, Tokenizer};
+use std::str::CharIndices;
/// Tokenize the text by splitting on whitespaces and punctuation.
#[derive(Clone)]
diff --git a/src/tokenizer/stemmer.rs b/src/tokenizer/stemmer.rs
index f2f4a8a..4c91bfb 100644
--- a/src/tokenizer/stemmer.rs
+++ b/src/tokenizer/stemmer.rs
@@ -1,6 +1,6 @@
-use std::sync::Arc;
use super::{Token, TokenFilter, TokenStream};
use rust_stemmers::{self, Algorithm};
+use std::sync::Arc;
/// `Stemmer` token filter. Currently only English is supported.
/// Tokens are expected to be lowercased beforehands.
diff --git a/src/tokenizer/tokenizer.rs b/src/tokenizer/tokenizer.rs
index 6fcc639..104cd0e 100644
--- a/src/tokenizer/tokenizer.rs
+++ b/src/tokenizer/tokenizer.rs
@@ -1,6 +1,5 @@
/// The tokenizer module contains all of the tools used to process
/// text in `tantivy`.
-
use std::borrow::{Borrow, BorrowMut};
use tokenizer::TokenStreamChain;
diff --git a/src/tokenizer/tokenizer_manager.rs b/src/tokenizer/tokenizer_manager.rs
index bbc141c..cbb46af 100644
--- a/src/tokenizer/tokenizer_manager.rs
+++ b/src/tokenizer/tokenizer_manager.rs
@@ -1,14 +1,14 @@
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
-use tokenizer::BoxedTokenizer;
-use tokenizer::Tokenizer;
use tokenizer::tokenizer::box_tokenizer;
-use tokenizer::RawTokenizer;
-use tokenizer::SimpleTokenizer;
+use tokenizer::BoxedTokenizer;
use tokenizer::JapaneseTokenizer;
-use tokenizer::RemoveLongFilter;
use tokenizer::LowerCaser;
+use tokenizer::RawTokenizer;
+use tokenizer::RemoveLongFilter;
+use tokenizer::SimpleTokenizer;
use tokenizer::Stemmer;
+use tokenizer::Tokenizer;
/// The tokenizer manager serves as a store for
/// all of the pre-configured tokenizer pipelines.