diff options
author | Raminder Singh <romi_ssk@yahoo.co.in> | 2019-09-07 16:10:21 +0530 |
---|---|---|
committer | Paul Masurel <paul.masurel@gmail.com> | 2019-09-07 19:40:21 +0900 |
commit | 141f5a93f706efe248d995548460ac42758c2e0d (patch) | |
tree | 73fdd254fe6e76a913a85943c874918d1b25423d | |
parent | df47d55cd24eaf1c145565d95db7eb692a5f7b72 (diff) |
Using FnvHashMap for mapping UnorderedTermId to TermOrdinal. Fixes #507 (#647)
* Using FnvHashMap for mapping UnorderedTermId to TermOrdinal. Fixes #507
* Fixed cargo fmt errors
-rw-r--r-- | src/fastfield/multivalued/writer.rs | 4 | ||||
-rw-r--r-- | src/fastfield/writer.rs | 3 | ||||
-rw-r--r-- | src/postings/postings_writer.rs | 7 |
3 files changed, 8 insertions, 6 deletions
diff --git a/src/fastfield/multivalued/writer.rs b/src/fastfield/multivalued/writer.rs index 9899fb1..24750f7 100644 --- a/src/fastfield/multivalued/writer.rs +++ b/src/fastfield/multivalued/writer.rs @@ -5,8 +5,8 @@ use crate::postings::UnorderedTermId; use crate::schema::{Document, Field}; use crate::termdict::TermOrdinal; use crate::DocId; +use fnv::FnvHashMap; use itertools::Itertools; -use std::collections::HashMap; use std::io; /// Writer for multi-valued (as in, more than one value per document) @@ -102,7 +102,7 @@ impl MultiValueIntFastFieldWriter { pub fn serialize( &self, serializer: &mut FastFieldSerializer, - mapping_opt: Option<&HashMap<UnorderedTermId, TermOrdinal>>, + mapping_opt: Option<&FnvHashMap<UnorderedTermId, TermOrdinal>>, ) -> io::Result<()> { { // writing the offset index diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index de9efe9..3afe6af 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -6,6 +6,7 @@ use crate::fastfield::{BytesFastFieldWriter, FastFieldSerializer}; use crate::postings::UnorderedTermId; use crate::schema::{Cardinality, Document, Field, FieldType, Schema}; use crate::termdict::TermOrdinal; +use fnv::FnvHashMap; use std::collections::HashMap; use std::io; @@ -116,7 +117,7 @@ impl FastFieldsWriter { pub fn serialize( &self, serializer: &mut FastFieldSerializer, - mapping: &HashMap<Field, HashMap<UnorderedTermId, TermOrdinal>>, + mapping: &HashMap<Field, FnvHashMap<UnorderedTermId, TermOrdinal>>, ) -> io::Result<()> { for field_writer in &self.single_value_writers { field_writer.serialize(serializer)?; diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs index 9b6e007..d916774 100644 --- a/src/postings/postings_writer.rs +++ b/src/postings/postings_writer.rs @@ -12,6 +12,7 @@ use crate::tokenizer::TokenStream; use crate::tokenizer::{Token, MAX_TOKEN_LEN}; use crate::DocId; use crate::Result; +use fnv::FnvHashMap; use std::collections::HashMap; use std::io; use std::marker::PhantomData; @@ -127,12 +128,12 @@ impl MultiFieldPostingsWriter { pub fn serialize( &self, serializer: &mut InvertedIndexSerializer, - ) -> Result<HashMap<Field, HashMap<UnorderedTermId, TermOrdinal>>> { + ) -> Result<HashMap<Field, FnvHashMap<UnorderedTermId, TermOrdinal>>> { let mut term_offsets: Vec<(&[u8], Addr, UnorderedTermId)> = self.term_index.iter().collect(); term_offsets.sort_unstable_by_key(|&(k, _, _)| k); - let mut unordered_term_mappings: HashMap<Field, HashMap<UnorderedTermId, TermOrdinal>> = + let mut unordered_term_mappings: HashMap<Field, FnvHashMap<UnorderedTermId, TermOrdinal>> = HashMap::new(); let field_offsets = make_field_partition(&term_offsets); @@ -147,7 +148,7 @@ impl MultiFieldPostingsWriter { let unordered_term_ids = term_offsets[start..stop] .iter() .map(|&(_, _, bucket)| bucket); - let mapping: HashMap<UnorderedTermId, TermOrdinal> = unordered_term_ids + let mapping: FnvHashMap<UnorderedTermId, TermOrdinal> = unordered_term_ids .enumerate() .map(|(term_ord, unord_term_id)| { (unord_term_id as UnorderedTermId, term_ord as TermOrdinal) |