casc_storage/index/
combined_index.rs1use crate::types::{ArchiveLocation, EKey};
4use dashmap::DashMap;
5use std::collections::BTreeMap;
6use std::sync::Arc;
7use tracing::{debug, trace};
8
9pub struct CombinedIndex {
11 bucket_indices: DashMap<u8, BTreeMap<EKey, ArchiveLocation>>,
13 global_index: DashMap<EKey, u8>,
15 #[allow(dead_code)]
17 bloom_filter: Option<Arc<BloomFilter>>,
18}
19
20impl CombinedIndex {
21 pub fn new() -> Self {
23 Self {
24 bucket_indices: DashMap::new(),
25 global_index: DashMap::new(),
26 bloom_filter: None,
27 }
28 }
29
30 pub fn insert(&self, ekey: EKey, location: ArchiveLocation) {
32 let bucket = ekey.bucket_index();
33
34 self.bucket_indices
36 .entry(bucket)
37 .or_default()
38 .insert(ekey, location);
39
40 self.global_index.insert(ekey, bucket);
42
43 trace!("Inserted {} into bucket {:02x}", ekey, bucket);
44 }
45
46 pub fn lookup(&self, ekey: &EKey) -> Option<ArchiveLocation> {
48 if let Some(bucket_ref) = self.global_index.get(ekey) {
50 let bucket = *bucket_ref;
51
52 if let Some(bucket_index) = self.bucket_indices.get(&bucket) {
54 return bucket_index.get(ekey).copied();
55 }
56 }
57
58 let computed_bucket = ekey.bucket_index();
60 if let Some(bucket_index) = self.bucket_indices.get(&computed_bucket) {
61 if let Some(location) = bucket_index.get(ekey) {
62 self.global_index.insert(*ekey, computed_bucket);
64 return Some(*location);
65 }
66 }
67
68 debug!("Falling back to full search for {}", ekey);
70 for bucket_ref in self.bucket_indices.iter() {
71 if let Some(location) = bucket_ref.value().get(ekey) {
72 self.global_index.insert(*ekey, *bucket_ref.key());
74 return Some(*location);
75 }
76 }
77
78 None
79 }
80
81 pub fn lookup_batch(&self, ekeys: &[EKey]) -> Vec<Option<ArchiveLocation>> {
83 ekeys.iter().map(|ekey| self.lookup(ekey)).collect()
84 }
85
86 pub fn get_bucket(&self, bucket: u8) -> Option<Vec<(EKey, ArchiveLocation)>> {
88 self.bucket_indices
89 .get(&bucket)
90 .map(|index| index.iter().map(|(k, v)| (*k, *v)).collect())
91 }
92
93 pub fn total_entries(&self) -> usize {
95 self.bucket_indices
96 .iter()
97 .map(|bucket| bucket.value().len())
98 .sum()
99 }
100
101 pub fn bucket_count(&self) -> usize {
103 self.bucket_indices.len()
104 }
105
106 pub fn clear(&self) {
108 self.bucket_indices.clear();
109 self.global_index.clear();
110 }
111
112 pub fn rebuild_global_index(&self) {
114 self.global_index.clear();
115
116 for bucket_ref in self.bucket_indices.iter() {
117 let bucket = *bucket_ref.key();
118 for ekey in bucket_ref.value().keys() {
119 self.global_index.insert(*ekey, bucket);
120 }
121 }
122
123 debug!(
124 "Rebuilt global index with {} entries",
125 self.global_index.len()
126 );
127 }
128
129 pub fn stats(&self) -> IndexStats {
131 let mut min_bucket_size = usize::MAX;
132 let mut max_bucket_size = 0;
133 let mut total_entries = 0;
134
135 for bucket_ref in self.bucket_indices.iter() {
136 let size = bucket_ref.value().len();
137 min_bucket_size = min_bucket_size.min(size);
138 max_bucket_size = max_bucket_size.max(size);
139 total_entries += size;
140 }
141
142 IndexStats {
143 total_entries,
144 bucket_count: self.bucket_indices.len(),
145 min_bucket_size: if min_bucket_size == usize::MAX {
146 0
147 } else {
148 min_bucket_size
149 },
150 max_bucket_size,
151 avg_bucket_size: if self.bucket_indices.is_empty() {
152 0.0
153 } else {
154 total_entries as f64 / self.bucket_indices.len() as f64
155 },
156 global_index_size: self.global_index.len(),
157 }
158 }
159}
160
161impl Default for CombinedIndex {
162 fn default() -> Self {
163 Self::new()
164 }
165}
166
167#[derive(Debug, Clone)]
169pub struct IndexStats {
170 pub total_entries: usize,
171 pub bucket_count: usize,
172 pub min_bucket_size: usize,
173 pub max_bucket_size: usize,
174 pub avg_bucket_size: f64,
175 pub global_index_size: usize,
176}
177
178struct BloomFilter {
180 }
182
183#[cfg(test)]
184mod tests {
185 use super::*;
186
187 #[test]
188 fn test_combined_index_lookup() {
189 let index = CombinedIndex::new();
190
191 let key1 = EKey::new([0x10; 16]);
193 let key2 = EKey::new([0x20; 16]);
194 let key3 = EKey::new([0x30; 16]);
195
196 let loc1 = ArchiveLocation {
197 archive_id: 1,
198 offset: 100,
199 size: 50,
200 };
201
202 let loc2 = ArchiveLocation {
203 archive_id: 2,
204 offset: 200,
205 size: 60,
206 };
207
208 index.insert(key1, loc1);
210 index.insert(key2, loc2);
211
212 assert_eq!(index.lookup(&key1), Some(loc1));
214 assert_eq!(index.lookup(&key2), Some(loc2));
215 assert_eq!(index.lookup(&key3), None);
216
217 let stats = index.stats();
219 assert_eq!(stats.total_entries, 2);
220 assert_eq!(stats.global_index_size, 2);
221 }
222
223 #[test]
224 fn test_batch_lookup() {
225 let index = CombinedIndex::new();
226
227 for i in 0..10u8 {
229 let mut key_bytes = [0u8; 16];
230 key_bytes[0] = i;
231 let key = EKey::new(key_bytes);
232 let loc = ArchiveLocation {
233 archive_id: i as u16,
234 offset: (i as u64) * 100,
235 size: 50,
236 };
237 index.insert(key, loc);
238 }
239
240 let keys: Vec<_> = (0..5u8)
242 .map(|i| {
243 let mut key_bytes = [0u8; 16];
244 key_bytes[0] = i;
245 EKey::new(key_bytes)
246 })
247 .collect();
248
249 let results = index.lookup_batch(&keys);
250 assert_eq!(results.len(), 5);
251 assert!(results.iter().all(|r| r.is_some()));
252 }
253}