diff options
author | Paul Masurel <paul.masurel@gmail.com> | 2017-12-14 18:23:35 +0900 |
---|---|---|
committer | Paul Masurel <paul.masurel@gmail.com> | 2017-12-14 18:23:35 +0900 |
commit | f24e5f405ec205b99989554a7cea48cb2f4b9d07 (patch) | |
tree | 1508a82b18ff0112c338abd271e572c16bd87dd9 /src/tokenizer | |
parent | 2589be3984a7fe71c38b7da07475a2481cdd4d27 (diff) |
NOBUG intellij misc lint
Diffstat (limited to 'src/tokenizer')
-rw-r--r-- | src/tokenizer/japanese_tokenizer.rs | 6 | ||||
-rw-r--r-- | src/tokenizer/lower_caser.rs | 2 | ||||
-rw-r--r-- | src/tokenizer/raw_tokenizer.rs | 2 | ||||
-rw-r--r-- | src/tokenizer/remove_long.rs | 6 | ||||
-rw-r--r-- | src/tokenizer/simple_tokenizer.rs | 2 | ||||
-rw-r--r-- | src/tokenizer/stemmer.rs | 4 | ||||
-rw-r--r-- | src/tokenizer/token_stream_chain.rs | 4 | ||||
-rw-r--r-- | src/tokenizer/tokenizer_manager.rs | 10 |
8 files changed, 20 insertions, 16 deletions
diff --git a/src/tokenizer/japanese_tokenizer.rs b/src/tokenizer/japanese_tokenizer.rs index c9981b2..3dfb3ad 100644 --- a/src/tokenizer/japanese_tokenizer.rs +++ b/src/tokenizer/japanese_tokenizer.rs @@ -30,15 +30,15 @@ impl<'a> Tokenizer<'a> for JapaneseTokenizer { offset_to = offset_from + term.len(); if term.chars().all(char::is_alphanumeric) { tokens.push(Token { - offset_from: offset_from, - offset_to: offset_to, + offset_from, + offset_to, position: pos, text: term, }); } } JapaneseTokenizerStream { - tokens: tokens, + tokens, cursor: Cursor::HasNotStarted, } } diff --git a/src/tokenizer/lower_caser.rs b/src/tokenizer/lower_caser.rs index b7357ee..a15d34b 100644 --- a/src/tokenizer/lower_caser.rs +++ b/src/tokenizer/lower_caser.rs @@ -48,6 +48,6 @@ where TailTokenStream: TokenStream, { fn wrap(tail: TailTokenStream) -> LowerCaserTokenStream<TailTokenStream> { - LowerCaserTokenStream { tail: tail } + LowerCaserTokenStream { tail } } } diff --git a/src/tokenizer/raw_tokenizer.rs b/src/tokenizer/raw_tokenizer.rs index fe36338..039ac6a 100644 --- a/src/tokenizer/raw_tokenizer.rs +++ b/src/tokenizer/raw_tokenizer.rs @@ -21,7 +21,7 @@ impl<'a> Tokenizer<'a> for RawTokenizer { text: text.to_string(), }; RawTokenStream { - token: token, + token, has_token: true, } } diff --git a/src/tokenizer/remove_long.rs b/src/tokenizer/remove_long.rs index 5637906..94d6b6c 100644 --- a/src/tokenizer/remove_long.rs +++ b/src/tokenizer/remove_long.rs @@ -14,7 +14,7 @@ pub struct RemoveLongFilter { impl RemoveLongFilter { // the limit is in bytes of the UTF-8 representation. pub fn limit(length_limit: usize) -> RemoveLongFilter { - RemoveLongFilter { length_limit: length_limit } + RemoveLongFilter { length_limit } } } @@ -31,8 +31,8 @@ where tail: TailTokenStream, ) -> RemoveLongFilterStream<TailTokenStream> { RemoveLongFilterStream { - token_length_limit: token_length_limit, - tail: tail, + token_length_limit, + tail, } } } diff --git a/src/tokenizer/simple_tokenizer.rs b/src/tokenizer/simple_tokenizer.rs index e9d93de..8850c5f 100644 --- a/src/tokenizer/simple_tokenizer.rs +++ b/src/tokenizer/simple_tokenizer.rs @@ -18,7 +18,7 @@ impl<'a> Tokenizer<'a> for SimpleTokenizer { fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl { SimpleTokenStream { - text: text, + text, chars: text.char_indices(), token: Token::default(), } diff --git a/src/tokenizer/stemmer.rs b/src/tokenizer/stemmer.rs index 1c349e0..9a8e7d1 100644 --- a/src/tokenizer/stemmer.rs +++ b/src/tokenizer/stemmer.rs @@ -67,8 +67,8 @@ where tail: TailTokenStream, ) -> StemmerTokenStream<TailTokenStream> { StemmerTokenStream { - tail: tail, - stemmer: stemmer, + tail, + stemmer, } } } diff --git a/src/tokenizer/token_stream_chain.rs b/src/tokenizer/token_stream_chain.rs index eaeccd4..4815936 100644 --- a/src/tokenizer/token_stream_chain.rs +++ b/src/tokenizer/token_stream_chain.rs @@ -18,9 +18,9 @@ where token_streams: Vec<TTokenStream>, ) -> TokenStreamChain<TTokenStream> { TokenStreamChain { - offsets: offsets, + offsets, stream_idx: 0, - token_streams: token_streams, + token_streams, position_shift: 0, token: Token::default(), } diff --git a/src/tokenizer/tokenizer_manager.rs b/src/tokenizer/tokenizer_manager.rs index 24f611a..54c28f1 100644 --- a/src/tokenizer/tokenizer_manager.rs +++ b/src/tokenizer/tokenizer_manager.rs @@ -17,9 +17,13 @@ use tokenizer::Stemmer; /// /// By default, it is populated with the following managers. /// -/// * raw : does not process nor tokenize the text. -/// * default : Chops the text on according to whitespace and -/// punctuation, removes tokens that are too long, lowercases +/// * `raw` : does not process nor tokenize the text. +/// * `default` : Chops the text on according to whitespace and +/// punctuation, removes tokens that are too long, and lowercases +/// tokens +/// * `en_stem` : Like `default`, but also applies stemming on the +/// resulting tokens. Stemming can improve the recall of your +/// search engine. #[derive(Clone)] pub struct TokenizerManager { tokenizers: Arc<RwLock<HashMap<String, Box<BoxedTokenizer>>>>, |