use crate::types::{ArchiveLocation, EKey};
use dashmap::DashMap;
use std::collections::BTreeMap;
use std::sync::Arc;
use tracing::{debug, trace};
pub struct CombinedIndex {
bucket_indices: DashMap<u8, BTreeMap<EKey, ArchiveLocation>>,
global_index: DashMap<EKey, u8>,
#[allow(dead_code)]
bloom_filter: Option<Arc<BloomFilter>>,
}
impl CombinedIndex {
pub fn new() -> Self {
Self {
bucket_indices: DashMap::new(),
global_index: DashMap::new(),
bloom_filter: None,
}
}
pub fn insert(&self, ekey: EKey, location: ArchiveLocation) {
let bucket = ekey.bucket_index();
self.bucket_indices
.entry(bucket)
.or_default()
.insert(ekey, location);
self.global_index.insert(ekey, bucket);
trace!("Inserted {} into bucket {:02x}", ekey, bucket);
}
pub fn lookup(&self, ekey: &EKey) -> Option<ArchiveLocation> {
if let Some(bucket_ref) = self.global_index.get(ekey) {
let bucket = *bucket_ref;
if let Some(bucket_index) = self.bucket_indices.get(&bucket) {
return bucket_index.get(ekey).copied();
}
}
let computed_bucket = ekey.bucket_index();
if let Some(bucket_index) = self.bucket_indices.get(&computed_bucket) {
if let Some(location) = bucket_index.get(ekey) {
self.global_index.insert(*ekey, computed_bucket);
return Some(*location);
}
}
debug!("Falling back to full search for {}", ekey);
for bucket_ref in self.bucket_indices.iter() {
if let Some(location) = bucket_ref.value().get(ekey) {
self.global_index.insert(*ekey, *bucket_ref.key());
return Some(*location);
}
}
None
}
pub fn lookup_batch(&self, ekeys: &[EKey]) -> Vec<Option<ArchiveLocation>> {
ekeys.iter().map(|ekey| self.lookup(ekey)).collect()
}
pub fn get_bucket(&self, bucket: u8) -> Option<Vec<(EKey, ArchiveLocation)>> {
self.bucket_indices
.get(&bucket)
.map(|index| index.iter().map(|(k, v)| (*k, *v)).collect())
}
pub fn total_entries(&self) -> usize {
self.bucket_indices
.iter()
.map(|bucket| bucket.value().len())
.sum()
}
pub fn bucket_count(&self) -> usize {
self.bucket_indices.len()
}
pub fn clear(&self) {
self.bucket_indices.clear();
self.global_index.clear();
}
pub fn rebuild_global_index(&self) {
self.global_index.clear();
for bucket_ref in self.bucket_indices.iter() {
let bucket = *bucket_ref.key();
for ekey in bucket_ref.value().keys() {
self.global_index.insert(*ekey, bucket);
}
}
debug!(
"Rebuilt global index with {} entries",
self.global_index.len()
);
}
pub fn stats(&self) -> IndexStats {
let mut min_bucket_size = usize::MAX;
let mut max_bucket_size = 0;
let mut total_entries = 0;
for bucket_ref in self.bucket_indices.iter() {
let size = bucket_ref.value().len();
min_bucket_size = min_bucket_size.min(size);
max_bucket_size = max_bucket_size.max(size);
total_entries += size;
}
IndexStats {
total_entries,
bucket_count: self.bucket_indices.len(),
min_bucket_size: if min_bucket_size == usize::MAX {
0
} else {
min_bucket_size
},
max_bucket_size,
avg_bucket_size: if self.bucket_indices.is_empty() {
0.0
} else {
total_entries as f64 / self.bucket_indices.len() as f64
},
global_index_size: self.global_index.len(),
}
}
}
impl Default for CombinedIndex {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct IndexStats {
pub total_entries: usize,
pub bucket_count: usize,
pub min_bucket_size: usize,
pub max_bucket_size: usize,
pub avg_bucket_size: f64,
pub global_index_size: usize,
}
struct BloomFilter {
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_combined_index_lookup() {
let index = CombinedIndex::new();
let key1 = EKey::new([0x10; 16]);
let key2 = EKey::new([0x20; 16]);
let key3 = EKey::new([0x30; 16]);
let loc1 = ArchiveLocation {
archive_id: 1,
offset: 100,
size: 50,
};
let loc2 = ArchiveLocation {
archive_id: 2,
offset: 200,
size: 60,
};
index.insert(key1, loc1);
index.insert(key2, loc2);
assert_eq!(index.lookup(&key1), Some(loc1));
assert_eq!(index.lookup(&key2), Some(loc2));
assert_eq!(index.lookup(&key3), None);
let stats = index.stats();
assert_eq!(stats.total_entries, 2);
assert_eq!(stats.global_index_size, 2);
}
#[test]
fn test_batch_lookup() {
let index = CombinedIndex::new();
for i in 0..10u8 {
let mut key_bytes = [0u8; 16];
key_bytes[0] = i;
let key = EKey::new(key_bytes);
let loc = ArchiveLocation {
archive_id: i as u16,
offset: (i as u64) * 100,
size: 50,
};
index.insert(key, loc);
}
let keys: Vec<_> = (0..5u8)
.map(|i| {
let mut key_bytes = [0u8; 16];
key_bytes[0] = i;
EKey::new(key_bytes)
})
.collect();
let results = index.lookup_batch(&keys);
assert_eq!(results.len(), 5);
assert!(results.iter().all(|r| r.is_some()));
}
}