use super::SegmentComponent; use census::{Inventory, TrackedObject}; use core::SegmentId; use indexer::Opstamp; use serde; use std::collections::HashSet; use std::fmt; use std::path::PathBuf; lazy_static! { static ref INVENTORY: Inventory = { Inventory::new() }; } #[derive(Clone, Debug, Serialize, Deserialize)] struct DeleteMeta { num_deleted_docs: u32, opstamp: Opstamp, } /// `SegmentMeta` contains simple meta information about a segment. /// /// For instance the number of docs it contains, /// how many are deleted, etc. #[derive(Clone)] pub struct SegmentMeta { tracked: TrackedObject, } impl fmt::Debug for SegmentMeta { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { self.tracked.fmt(f) } } impl serde::Serialize for SegmentMeta { fn serialize( &self, serializer: S, ) -> Result<::Ok, ::Error> where S: serde::Serializer, { self.tracked.serialize(serializer) } } impl<'a> serde::Deserialize<'a> for SegmentMeta { fn deserialize(deserializer: D) -> Result>::Error> where D: serde::Deserializer<'a>, { let inner = InnerSegmentMeta::deserialize(deserializer)?; let tracked = INVENTORY.track(inner); Ok(SegmentMeta { tracked }) } } impl SegmentMeta { /// Lists all living `SegmentMeta` object at the time of the call. pub fn all() -> Vec { INVENTORY .list() .into_iter() .map(|inner| SegmentMeta { tracked: inner }) .collect::>() } /// Creates a new `SegmentMeta` object. #[doc(hidden)] pub fn new(segment_id: SegmentId, max_doc: u32) -> SegmentMeta { let inner = InnerSegmentMeta { segment_id, max_doc, deletes: None, }; SegmentMeta { tracked: INVENTORY.track(inner), } } /// Returns the segment id. pub fn id(&self) -> SegmentId { self.tracked.segment_id } /// Returns the number of deleted documents. pub fn num_deleted_docs(&self) -> u32 { self.tracked .deletes .as_ref() .map(|delete_meta| delete_meta.num_deleted_docs) .unwrap_or(0u32) } /// Returns the list of files that /// are required for the segment meta. /// /// This is useful as the way tantivy removes files /// is by removing all files that have been created by tantivy /// and are not used by any segment anymore. pub fn list_files(&self) -> HashSet { SegmentComponent::iterator() .map(|component| self.relative_path(*component)) .collect::>() } /// Returns the relative path of a component of our segment. /// /// It just joins the segment id with the extension /// associated to a segment component. pub fn relative_path(&self, component: SegmentComponent) -> PathBuf { let mut path = self.id().uuid_string(); path.push_str(&*match component { SegmentComponent::POSTINGS => ".idx".to_string(), SegmentComponent::POSITIONS => ".pos".to_string(), SegmentComponent::POSITIONSSKIP => ".posidx".to_string(), SegmentComponent::TERMS => ".term".to_string(), SegmentComponent::STORE => ".store".to_string(), SegmentComponent::FASTFIELDS => ".fast".to_string(), SegmentComponent::FIELDNORMS => ".fieldnorm".to_string(), SegmentComponent::DELETE => format!(".{}.del", self.delete_opstamp().unwrap_or(0)), }); PathBuf::from(path) } /// Return the highest doc id + 1 /// /// If there are no deletes, then num_docs = max_docs /// and all the doc ids contains in this segment /// are exactly (0..max_doc). pub fn max_doc(&self) -> u32 { self.tracked.max_doc } /// Return the number of documents in the segment. pub fn num_docs(&self) -> u32 { self.max_doc() - self.num_deleted_docs() } /// Returns the opstamp of the last delete operation /// taken in account in this segment. pub fn delete_opstamp(&self) -> Option { self.tracked .deletes .as_ref() .map(|delete_meta| delete_meta.opstamp) } /// Returns true iff the segment meta contains /// delete information. pub fn has_deletes(&self) -> bool { self.num_deleted_docs() > 0 } #[doc(hidden)] pub fn with_delete_meta(self, num_deleted_docs: u32, opstamp: Opstamp) -> SegmentMeta { let delete_meta = DeleteMeta { num_deleted_docs, opstamp, }; let tracked = self.tracked.map(move |inner_meta| InnerSegmentMeta { segment_id: inner_meta.segment_id, max_doc: inner_meta.max_doc, deletes: Some(delete_meta), }); SegmentMeta { tracked } } } #[derive(Clone, Debug, Serialize, Deserialize)] struct InnerSegmentMeta { segment_id: SegmentId, max_doc: u32, deletes: Option, }