summaryrefslogtreecommitdiffstats
path: root/src/collector
diff options
context:
space:
mode:
authorPaul Masurel <paul.masurel@gmail.com>2017-05-15 22:30:18 +0900
committerPaul Masurel <paul.masurel@gmail.com>2017-05-15 22:30:18 +0900
commit4c8f9742f882dd1366f4130ac11a5ce4a786180f (patch)
treeaf07264eadf34f1dd55898e90ff23388734324f9 /src/collector
parenta23b7a181541f00d0a8b437af8617951526d23a6 (diff)
format
Diffstat (limited to 'src/collector')
-rw-r--r--src/collector/chained_collector.rs19
-rw-r--r--src/collector/count_collector.rs20
-rw-r--r--src/collector/mod.rs49
-rw-r--r--src/collector/multi_collector.rs14
-rw-r--r--src/collector/top_collector.rs60
5 files changed, 82 insertions, 80 deletions
diff --git a/src/collector/chained_collector.rs b/src/collector/chained_collector.rs
index 524ffec..6cc5785 100644
--- a/src/collector/chained_collector.rs
+++ b/src/collector/chained_collector.rs
@@ -7,8 +7,8 @@ use Score;
/// Collector that does nothing.
-/// This is used in the chain Collector and will hopefully
-/// be optimized away by the compiler.
+/// This is used in the chain Collector and will hopefully
+/// be optimized away by the compiler.
pub struct DoNothingCollector;
impl Collector for DoNothingCollector {
#[inline]
@@ -24,10 +24,10 @@ impl Collector for DoNothingCollector {
/// are known at compile time.
pub struct ChainedCollector<Left: Collector, Right: Collector> {
left: Left,
- right: Right
+ right: Right,
}
-impl<Left: Collector, Right: Collector> ChainedCollector<Left, Right> {
+impl<Left: Collector, Right: Collector> ChainedCollector<Left, Right> {
/// Adds a collector
pub fn push<C: Collector>(self, new_collector: &mut C) -> ChainedCollector<Self, &mut C> {
ChainedCollector {
@@ -38,7 +38,10 @@ impl<Left: Collector, Right: Collector> ChainedCollector<Left, Right> {
}
impl<Left: Collector, Right: Collector> Collector for ChainedCollector<Left, Right> {
- fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> Result<()> {
+ fn set_segment(&mut self,
+ segment_local_id: SegmentLocalId,
+ segment: &SegmentReader)
+ -> Result<()> {
try!(self.left.set_segment(segment_local_id, segment));
try!(self.right.set_segment(segment_local_id, segment));
Ok(())
@@ -70,9 +73,7 @@ mod tests {
let mut top_collector = TopCollector::with_limit(2);
let mut count_collector = CountCollector::default();
{
- let mut collectors = chain()
- .push(&mut top_collector)
- .push(&mut count_collector);
+ let mut collectors = chain().push(&mut top_collector).push(&mut count_collector);
collectors.collect(1, 0.2);
collectors.collect(2, 0.1);
collectors.collect(3, 0.5);
@@ -80,4 +81,4 @@ mod tests {
assert_eq!(count_collector.count(), 3);
assert!(top_collector.at_capacity());
}
-} \ No newline at end of file
+}
diff --git a/src/collector/count_collector.rs b/src/collector/count_collector.rs
index ff15abd..bfb17eb 100644
--- a/src/collector/count_collector.rs
+++ b/src/collector/count_collector.rs
@@ -6,7 +6,7 @@ use SegmentReader;
use SegmentLocalId;
/// `CountCollector` collector only counts how many
-/// documents match the query.
+/// documents match the query.
pub struct CountCollector {
count: usize,
}
@@ -14,20 +14,18 @@ pub struct CountCollector {
impl CountCollector {
/// Returns the count of documents that were
/// collected.
- pub fn count(&self,) -> usize {
+ pub fn count(&self) -> usize {
self.count
}
}
impl Default for CountCollector {
fn default() -> CountCollector {
- CountCollector {count: 0,
- }
+ CountCollector { count: 0 }
}
}
impl Collector for CountCollector {
-
fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> Result<()> {
Ok(())
}
@@ -47,11 +45,11 @@ mod tests {
#[bench]
fn build_collector(b: &mut Bencher) {
b.iter(|| {
- let mut count_collector = CountCollector::default();
- for doc in 0..1_000_000 {
- count_collector.collect(doc, 1f32);
- }
- count_collector.count()
- });
+ let mut count_collector = CountCollector::default();
+ for doc in 0..1_000_000 {
+ count_collector.collect(doc, 1f32);
+ }
+ count_collector.count()
+ });
}
}
diff --git a/src/collector/mod.rs b/src/collector/mod.rs
index c3964c5..72d5797 100644
--- a/src/collector/mod.rs
+++ b/src/collector/mod.rs
@@ -16,11 +16,11 @@ pub use self::top_collector::TopCollector;
mod chained_collector;
pub use self::chained_collector::chain;
-/// Collectors are in charge of collecting and retaining relevant
+/// Collectors are in charge of collecting and retaining relevant
/// information from the document found and scored by the query.
///
///
-/// For instance,
+/// For instance,
///
/// - keeping track of the top 10 best documents
/// - computing a breakdown over a fast field
@@ -29,7 +29,7 @@ pub use self::chained_collector::chain;
/// Queries are in charge of pushing the `DocSet` to the collector.
///
/// As they work on multiple segments, they first inform
-/// the collector of a change in a segment and then
+/// the collector of a change in a segment and then
/// call the `collect` method to push the document to the collector.
///
/// Temporally, our collector will receive calls
@@ -46,16 +46,22 @@ pub use self::chained_collector::chain;
///
/// Segments are not guaranteed to be visited in any specific order.
pub trait Collector {
- /// `set_segment` is called before beginning to enumerate
+ /// `set_segment` is called before beginning to enumerate
/// on this segment.
- fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> Result<()>;
+ fn set_segment(&mut self,
+ segment_local_id: SegmentLocalId,
+ segment: &SegmentReader)
+ -> Result<()>;
/// The query pushes the scored document to the collector via this method.
fn collect(&mut self, doc: DocId, score: Score);
}
impl<'a, C: Collector> Collector for &'a mut C {
- fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> Result<()> {
+ fn set_segment(&mut self,
+ segment_local_id: SegmentLocalId,
+ segment: &SegmentReader)
+ -> Result<()> {
(*self).set_segment(segment_local_id, segment)
}
/// The query pushes the scored document to the collector via this method.
@@ -77,7 +83,7 @@ pub mod tests {
use fastfield::U64FastFieldReader;
use fastfield::FastFieldReader;
use schema::Field;
-
+
/// Stores all of the doc ids.
/// This collector is only used for tests.
/// It is unusable in practise, as it does not store
@@ -90,7 +96,7 @@ pub mod tests {
impl TestCollector {
/// Return the exhalist of documents.
- pub fn docs(self,) -> Vec<DocId> {
+ pub fn docs(self) -> Vec<DocId> {
self.docs
}
}
@@ -106,7 +112,6 @@ pub mod tests {
}
impl Collector for TestCollector {
-
fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> {
self.offset += self.segment_max_doc;
self.segment_max_doc = reader.max_doc();
@@ -117,10 +122,10 @@ pub mod tests {
self.docs.push(doc + self.offset);
}
}
-
-
-
-
+
+
+
+
/// Collects in order all of the fast fields for all of the
/// doc in the `DocSet`
///
@@ -140,11 +145,11 @@ pub mod tests {
}
}
- pub fn vals(self,) -> Vec<u64> {
+ pub fn vals(self) -> Vec<u64> {
self.vals
}
}
-
+
impl Collector for FastFieldTestCollector {
fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> {
self.ff_reader = Some(reader.get_fast_field_reader(self.field)?);
@@ -161,12 +166,12 @@ pub mod tests {
#[bench]
fn build_collector(b: &mut Bencher) {
b.iter(|| {
- let mut count_collector = CountCollector::default();
- let docs: Vec<u32> = (0..1_000_000).collect();
- for doc in docs {
- count_collector.collect(doc, 1f32);
- }
- count_collector.count()
- });
+ let mut count_collector = CountCollector::default();
+ let docs: Vec<u32> = (0..1_000_000).collect();
+ for doc in docs {
+ count_collector.collect(doc, 1f32);
+ }
+ count_collector.count()
+ });
}
}
diff --git a/src/collector/multi_collector.rs b/src/collector/multi_collector.rs
index e5eddc7..c251578 100644
--- a/src/collector/multi_collector.rs
+++ b/src/collector/multi_collector.rs
@@ -7,7 +7,7 @@ use SegmentLocalId;
/// Multicollector makes it possible to collect on more than one collector.
-/// It should only be used for use cases where the Collector types is unknown
+/// It should only be used for use cases where the Collector types is unknown
/// at compile time.
/// If the type of the collectors is known, you should prefer to use `ChainedCollector`.
pub struct MultiCollector<'a> {
@@ -17,15 +17,16 @@ pub struct MultiCollector<'a> {
impl<'a> MultiCollector<'a> {
/// Constructor
pub fn from(collectors: Vec<&'a mut Collector>) -> MultiCollector {
- MultiCollector {
- collectors: collectors,
- }
+ MultiCollector { collectors: collectors }
}
}
impl<'a> Collector for MultiCollector<'a> {
- fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> Result<()> {
+ fn set_segment(&mut self,
+ segment_local_id: SegmentLocalId,
+ segment: &SegmentReader)
+ -> Result<()> {
for collector in &mut self.collectors {
try!(collector.set_segment(segment_local_id, segment));
}
@@ -52,7 +53,8 @@ mod tests {
let mut top_collector = TopCollector::with_limit(2);
let mut count_collector = CountCollector::default();
{
- let mut collectors = MultiCollector::from(vec!(&mut top_collector, &mut count_collector));
+ let mut collectors = MultiCollector::from(vec![&mut top_collector,
+ &mut count_collector]);
collectors.collect(1, 0.2);
collectors.collect(2, 0.1);
collectors.collect(3, 0.5);
diff --git a/src/collector/top_collector.rs b/src/collector/top_collector.rs
index 6425eb3..a02141f 100644
--- a/src/collector/top_collector.rs
+++ b/src/collector/top_collector.rs
@@ -12,8 +12,7 @@ use Score;
#[derive(Clone, Copy)]
struct GlobalScoredDoc {
score: Score,
- doc_address: DocAddress
-
+ doc_address: DocAddress,
}
impl PartialOrd for GlobalScoredDoc {
@@ -25,10 +24,10 @@ impl PartialOrd for GlobalScoredDoc {
impl Ord for GlobalScoredDoc {
#[inline]
fn cmp(&self, other: &GlobalScoredDoc) -> Ordering {
- other.score.partial_cmp(&self.score)
- .unwrap_or(
- other.doc_address.cmp(&self.doc_address)
- )
+ other
+ .score
+ .partial_cmp(&self.score)
+ .unwrap_or(other.doc_address.cmp(&self.doc_address))
}
}
@@ -53,7 +52,6 @@ pub struct TopCollector {
}
impl TopCollector {
-
/// Creates a top collector, with a number of documents equal to "limit".
///
/// # Panics
@@ -68,9 +66,9 @@ impl TopCollector {
segment_id: 0,
}
}
-
+
/// Returns K best documents sorted in decreasing order.
- ///
+ ///
/// Calling this method triggers the sort.
/// The result of the sort is not cached.
pub fn docs(&self) -> Vec<DocAddress> {
@@ -81,30 +79,27 @@ impl TopCollector {
}
/// Returns K best ScoredDocument sorted in decreasing order.
- ///
+ ///
/// Calling this method triggers the sort.
/// The result of the sort is not cached.
pub fn score_docs(&self) -> Vec<(Score, DocAddress)> {
- let mut scored_docs: Vec<GlobalScoredDoc> = self.heap
- .iter()
- .cloned()
- .collect();
+ let mut scored_docs: Vec<GlobalScoredDoc> = self.heap.iter().cloned().collect();
scored_docs.sort();
- scored_docs.into_iter()
- .map(|GlobalScoredDoc {score, doc_address}| (score, doc_address))
+ scored_docs
+ .into_iter()
+ .map(|GlobalScoredDoc { score, doc_address }| (score, doc_address))
.collect()
}
/// Return true iff at least K documents have gone through
/// the collector.
#[inline]
- pub fn at_capacity(&self, ) -> bool {
+ pub fn at_capacity(&self) -> bool {
self.heap.len() >= self.limit
}
}
impl Collector for TopCollector {
-
fn set_segment(&mut self, segment_id: SegmentLocalId, _: &SegmentReader) -> Result<()> {
self.segment_id = segment_id;
Ok(())
@@ -113,17 +108,21 @@ impl Collector for TopCollector {
fn collect(&mut self, doc: DocId, score: Score) {
if self.at_capacity() {
// It's ok to unwrap as long as a limit of 0 is forbidden.
- let limit_doc: GlobalScoredDoc = *self.heap.peek().expect("Top collector with size 0 is forbidden");
+ let limit_doc: GlobalScoredDoc =
+ *self.heap
+ .peek()
+ .expect("Top collector with size 0 is forbidden");
if limit_doc.score < score {
- let mut mut_head = self.heap.peek_mut().expect("Top collector with size 0 is forbidden");
+ let mut mut_head = self.heap
+ .peek_mut()
+ .expect("Top collector with size 0 is forbidden");
mut_head.score = score;
- mut_head.doc_address = DocAddress(self.segment_id, doc);
+ mut_head.doc_address = DocAddress(self.segment_id, doc);
}
- }
- else {
+ } else {
let wrapped_doc = GlobalScoredDoc {
score: score,
- doc_address: DocAddress(self.segment_id, doc)
+ doc_address: DocAddress(self.segment_id, doc),
};
self.heap.push(wrapped_doc);
}
@@ -147,13 +146,12 @@ mod tests {
top_collector.collect(3, 0.2);
top_collector.collect(5, 0.3);
assert!(!top_collector.at_capacity());
- let score_docs: Vec<(Score, DocId)> = top_collector.score_docs()
+ let score_docs: Vec<(Score, DocId)> = top_collector
+ .score_docs()
.into_iter()
.map(|(score, doc_address)| (score, doc_address.doc()))
.collect();
- assert_eq!(score_docs, vec!(
- (0.8, 1), (0.3, 5), (0.2, 3),
- ));
+ assert_eq!(score_docs, vec![(0.8, 1), (0.3, 5), (0.2, 3)]);
}
#[test]
@@ -171,9 +169,7 @@ mod tests {
.into_iter()
.map(|(score, doc_address)| (score, doc_address.doc()))
.collect();
- assert_eq!(score_docs, vec!(
- (0.9, 7), (0.8, 1), (0.3, 5), (0.2, 3)
- ));
+ assert_eq!(score_docs, vec![(0.9, 7), (0.8, 1), (0.3, 5), (0.2, 3)]);
}
{
let docs: Vec<DocId> = top_collector
@@ -181,7 +177,7 @@ mod tests {
.into_iter()
.map(|doc_address| doc_address.doc())
.collect();
- assert_eq!(docs, vec!(7, 1, 5, 3));
+ assert_eq!(docs, vec![7, 1, 5, 3]);
}