summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaminder Singh <romi_ssk@yahoo.co.in>2019-09-07 16:10:21 +0530
committerPaul Masurel <paul.masurel@gmail.com>2019-09-07 19:40:21 +0900
commit141f5a93f706efe248d995548460ac42758c2e0d (patch)
tree73fdd254fe6e76a913a85943c874918d1b25423d
parentdf47d55cd24eaf1c145565d95db7eb692a5f7b72 (diff)
Using FnvHashMap for mapping UnorderedTermId to TermOrdinal. Fixes #507 (#647)
* Using FnvHashMap for mapping UnorderedTermId to TermOrdinal. Fixes #507 * Fixed cargo fmt errors
-rw-r--r--src/fastfield/multivalued/writer.rs4
-rw-r--r--src/fastfield/writer.rs3
-rw-r--r--src/postings/postings_writer.rs7
3 files changed, 8 insertions, 6 deletions
diff --git a/src/fastfield/multivalued/writer.rs b/src/fastfield/multivalued/writer.rs
index 9899fb1..24750f7 100644
--- a/src/fastfield/multivalued/writer.rs
+++ b/src/fastfield/multivalued/writer.rs
@@ -5,8 +5,8 @@ use crate::postings::UnorderedTermId;
use crate::schema::{Document, Field};
use crate::termdict::TermOrdinal;
use crate::DocId;
+use fnv::FnvHashMap;
use itertools::Itertools;
-use std::collections::HashMap;
use std::io;
/// Writer for multi-valued (as in, more than one value per document)
@@ -102,7 +102,7 @@ impl MultiValueIntFastFieldWriter {
pub fn serialize(
&self,
serializer: &mut FastFieldSerializer,
- mapping_opt: Option<&HashMap<UnorderedTermId, TermOrdinal>>,
+ mapping_opt: Option<&FnvHashMap<UnorderedTermId, TermOrdinal>>,
) -> io::Result<()> {
{
// writing the offset index
diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs
index de9efe9..3afe6af 100644
--- a/src/fastfield/writer.rs
+++ b/src/fastfield/writer.rs
@@ -6,6 +6,7 @@ use crate::fastfield::{BytesFastFieldWriter, FastFieldSerializer};
use crate::postings::UnorderedTermId;
use crate::schema::{Cardinality, Document, Field, FieldType, Schema};
use crate::termdict::TermOrdinal;
+use fnv::FnvHashMap;
use std::collections::HashMap;
use std::io;
@@ -116,7 +117,7 @@ impl FastFieldsWriter {
pub fn serialize(
&self,
serializer: &mut FastFieldSerializer,
- mapping: &HashMap<Field, HashMap<UnorderedTermId, TermOrdinal>>,
+ mapping: &HashMap<Field, FnvHashMap<UnorderedTermId, TermOrdinal>>,
) -> io::Result<()> {
for field_writer in &self.single_value_writers {
field_writer.serialize(serializer)?;
diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs
index 9b6e007..d916774 100644
--- a/src/postings/postings_writer.rs
+++ b/src/postings/postings_writer.rs
@@ -12,6 +12,7 @@ use crate::tokenizer::TokenStream;
use crate::tokenizer::{Token, MAX_TOKEN_LEN};
use crate::DocId;
use crate::Result;
+use fnv::FnvHashMap;
use std::collections::HashMap;
use std::io;
use std::marker::PhantomData;
@@ -127,12 +128,12 @@ impl MultiFieldPostingsWriter {
pub fn serialize(
&self,
serializer: &mut InvertedIndexSerializer,
- ) -> Result<HashMap<Field, HashMap<UnorderedTermId, TermOrdinal>>> {
+ ) -> Result<HashMap<Field, FnvHashMap<UnorderedTermId, TermOrdinal>>> {
let mut term_offsets: Vec<(&[u8], Addr, UnorderedTermId)> =
self.term_index.iter().collect();
term_offsets.sort_unstable_by_key(|&(k, _, _)| k);
- let mut unordered_term_mappings: HashMap<Field, HashMap<UnorderedTermId, TermOrdinal>> =
+ let mut unordered_term_mappings: HashMap<Field, FnvHashMap<UnorderedTermId, TermOrdinal>> =
HashMap::new();
let field_offsets = make_field_partition(&term_offsets);
@@ -147,7 +148,7 @@ impl MultiFieldPostingsWriter {
let unordered_term_ids = term_offsets[start..stop]
.iter()
.map(|&(_, _, bucket)| bucket);
- let mapping: HashMap<UnorderedTermId, TermOrdinal> = unordered_term_ids
+ let mapping: FnvHashMap<UnorderedTermId, TermOrdinal> = unordered_term_ids
.enumerate()
.map(|(term_ord, unord_term_id)| {
(unord_term_id as UnorderedTermId, term_ord as TermOrdinal)