use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::{Arc, Mutex, OnceLock};
use roaring::RoaringBitmap;
use crate::index::overlay::OverlayView;
use crate::index::segment::MmapSegment;
use crate::path::PathIndex;
pub(crate) const POSTING_BITMAP_CACHE_MAX_ENTRIES: usize = 1024;
pub struct BaseSegments {
pub segments: Vec<MmapSegment>,
pub base_ids: Vec<u32>,
pub base_doc_paths: Vec<Option<PathBuf>>,
pub path_doc_ids: HashMap<PathBuf, Vec<u32>>,
}
pub struct IndexSnapshot {
pub base: Arc<BaseSegments>,
pub overlay: OverlayView,
pub delete_set: RoaringBitmap,
pub path_index: PathIndex,
pub base_doc_to_file_id: Arc<Vec<u32>>,
pub overlay_doc_to_file_id: HashMap<u32, u32>,
all_doc_ids_cache: OnceLock<RoaringBitmap>,
posting_bitmap_cache: OnceLock<Mutex<HashMap<u64, Arc<RoaringBitmap>>>>,
pub scan_threshold: f64,
}
impl IndexSnapshot {
pub fn base_segments(&self) -> &[MmapSegment] {
&self.base.segments
}
pub fn segment_base_ids(&self) -> &[u32] {
&self.base.base_ids
}
pub fn all_doc_ids(&self) -> &RoaringBitmap {
self.all_doc_ids_cache.get_or_init(|| {
let mut bm = RoaringBitmap::new();
for (seg_idx, seg) in self.base.segments.iter().enumerate() {
let base = self.base.base_ids.get(seg_idx).copied().unwrap_or(0);
for local in 0..seg.doc_count {
let global = base + local;
if !self.delete_set.contains(global) {
bm.insert(global);
}
}
}
for doc in &self.overlay.docs {
bm.insert(doc.doc_id);
}
bm
})
}
fn posting_bitmap_cache(&self) -> &Mutex<HashMap<u64, Arc<RoaringBitmap>>> {
self.posting_bitmap_cache
.get_or_init(|| Mutex::new(HashMap::new()))
}
pub(crate) fn cached_posting_bitmap(&self, gram_hash: u64) -> Option<Arc<RoaringBitmap>> {
let cache = self
.posting_bitmap_cache()
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
cache.get(&gram_hash).cloned()
}
pub(crate) fn store_posting_bitmap(
&self,
gram_hash: u64,
bitmap: Arc<RoaringBitmap>,
) -> Arc<RoaringBitmap> {
let mut cache = self
.posting_bitmap_cache()
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
if !cache.contains_key(&gram_hash) && cache.len() >= POSTING_BITMAP_CACHE_MAX_ENTRIES {
cache.clear();
}
cache
.entry(gram_hash)
.or_insert_with(|| Arc::clone(&bitmap))
.clone()
}
#[cfg(test)]
pub(crate) fn clone_for_test(&self) -> IndexSnapshot {
IndexSnapshot {
base: Arc::clone(&self.base),
overlay: self.overlay.clone(),
delete_set: self.delete_set.clone(),
path_index: self.path_index.clone(),
base_doc_to_file_id: Arc::clone(&self.base_doc_to_file_id),
overlay_doc_to_file_id: self.overlay_doc_to_file_id.clone(),
scan_threshold: self.scan_threshold,
all_doc_ids_cache: OnceLock::new(),
posting_bitmap_cache: OnceLock::new(),
}
}
#[cfg(test)]
pub(crate) fn with_scan_threshold(&self, threshold: f64) -> IndexSnapshot {
IndexSnapshot {
scan_threshold: threshold,
..self.clone_for_test()
}
}
#[cfg(test)]
pub(crate) fn posting_bitmap_cache_len(&self) -> usize {
self.posting_bitmap_cache
.get()
.map(|cache| {
cache
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner())
.len()
})
.unwrap_or(0)
}
}
pub fn new_snapshot(
base: Arc<BaseSegments>,
overlay: crate::index::overlay::OverlayView,
delete_set: roaring::RoaringBitmap,
path_index: crate::path::PathIndex,
base_doc_to_file_id: Arc<Vec<u32>>,
overlay_doc_to_file_id: HashMap<u32, u32>,
scan_threshold: f64,
) -> IndexSnapshot {
IndexSnapshot {
base,
overlay,
delete_set,
path_index,
base_doc_to_file_id,
overlay_doc_to_file_id,
scan_threshold,
all_doc_ids_cache: OnceLock::new(),
posting_bitmap_cache: OnceLock::new(),
}
}