casc_storage/index/
sorted_index.rs

1//! Sorted index implementation for efficient binary search lookups
2
3use crate::types::{ArchiveLocation, EKey};
4use std::collections::BTreeMap;
5
6/// A sorted index that provides O(log n) lookups using binary search
7#[derive(Debug, Clone)]
8pub struct SortedIndex {
9    /// Entries stored in a BTreeMap for automatic sorting and binary search
10    entries: BTreeMap<EKey, ArchiveLocation>,
11    /// Cached sorted keys for range queries
12    sorted_keys: Option<Vec<EKey>>,
13}
14
15impl SortedIndex {
16    /// Create a new empty sorted index
17    pub fn new() -> Self {
18        Self {
19            entries: BTreeMap::new(),
20            sorted_keys: None,
21        }
22    }
23
24    /// Create a sorted index with pre-allocated capacity
25    pub fn with_capacity(_capacity: usize) -> Self {
26        // BTreeMap doesn't support with_capacity, but we keep the API consistent
27        Self::new()
28    }
29
30    /// Insert an entry into the index
31    pub fn insert(&mut self, key: EKey, location: ArchiveLocation) {
32        self.entries.insert(key, location);
33        // Invalidate sorted keys cache
34        self.sorted_keys = None;
35    }
36
37    /// Perform a binary search lookup - O(log n)
38    pub fn lookup(&self, key: &EKey) -> Option<&ArchiveLocation> {
39        self.entries.get(key)
40    }
41
42    /// Get a range of entries between two keys
43    pub fn range(
44        &self,
45        start: &EKey,
46        end: &EKey,
47    ) -> impl Iterator<Item = (&EKey, &ArchiveLocation)> {
48        self.entries.range(start..=end)
49    }
50
51    /// Find the first entry with key >= target
52    pub fn lower_bound(&self, key: &EKey) -> Option<(&EKey, &ArchiveLocation)> {
53        self.entries.range(key..).next()
54    }
55
56    /// Find the last entry with key <= target
57    pub fn upper_bound(&self, key: &EKey) -> Option<(&EKey, &ArchiveLocation)> {
58        self.entries.range(..=key).next_back()
59    }
60
61    /// Get all entries
62    pub fn entries(&self) -> impl Iterator<Item = (&EKey, &ArchiveLocation)> {
63        self.entries.iter()
64    }
65
66    /// Get the number of entries
67    pub fn len(&self) -> usize {
68        self.entries.len()
69    }
70
71    /// Check if the index is empty
72    pub fn is_empty(&self) -> bool {
73        self.entries.is_empty()
74    }
75
76    /// Clear all entries
77    pub fn clear(&mut self) {
78        self.entries.clear();
79        self.sorted_keys = None;
80    }
81
82    /// Get sorted keys (cached)
83    pub fn sorted_keys(&mut self) -> &[EKey] {
84        if self.sorted_keys.is_none() {
85            self.sorted_keys = Some(self.entries.keys().copied().collect());
86        }
87        self.sorted_keys.as_ref().unwrap()
88    }
89
90    /// Perform a bulk insert from an iterator
91    pub fn extend<I>(&mut self, iter: I)
92    where
93        I: IntoIterator<Item = (EKey, ArchiveLocation)>,
94    {
95        self.entries.extend(iter);
96        self.sorted_keys = None;
97    }
98
99    /// Create from a HashMap for migration
100    pub fn from_hashmap(map: std::collections::HashMap<EKey, ArchiveLocation>) -> Self {
101        let mut index = Self::new();
102        index.extend(map);
103        index
104    }
105
106    /// Convert to BTreeMap for compatibility
107    pub fn into_btree_map(self) -> BTreeMap<EKey, ArchiveLocation> {
108        self.entries
109    }
110}
111
112impl Default for SortedIndex {
113    fn default() -> Self {
114        Self::new()
115    }
116}
117
118#[cfg(test)]
119mod tests {
120    use super::*;
121
122    #[test]
123    fn test_sorted_index_operations() {
124        let mut index = SortedIndex::new();
125
126        // Create test keys
127        let key1 = EKey::new([1; 16]);
128        let key2 = EKey::new([2; 16]);
129        let key3 = EKey::new([3; 16]);
130
131        let loc1 = ArchiveLocation {
132            archive_id: 1,
133            offset: 100,
134            size: 50,
135        };
136
137        let loc2 = ArchiveLocation {
138            archive_id: 2,
139            offset: 200,
140            size: 60,
141        };
142
143        let loc3 = ArchiveLocation {
144            archive_id: 3,
145            offset: 300,
146            size: 70,
147        };
148
149        // Insert entries
150        index.insert(key2, loc2);
151        index.insert(key1, loc1);
152        index.insert(key3, loc3);
153
154        // Test lookup
155        assert_eq!(index.lookup(&key1), Some(&loc1));
156        assert_eq!(index.lookup(&key2), Some(&loc2));
157        assert_eq!(index.lookup(&key3), Some(&loc3));
158
159        // Test non-existent key
160        let key4 = EKey::new([4; 16]);
161        assert_eq!(index.lookup(&key4), None);
162
163        // Test size
164        assert_eq!(index.len(), 3);
165
166        // Test range query
167        let range: Vec<_> = index.range(&key1, &key2).collect();
168        assert_eq!(range.len(), 2);
169
170        // Test lower bound
171        let lower = index.lower_bound(&key2);
172        assert!(lower.is_some());
173        assert_eq!(lower.unwrap().0, &key2);
174    }
175
176    #[test]
177    fn test_sorted_order() {
178        let mut index = SortedIndex::new();
179
180        // Insert in random order
181        for i in [5u8, 2, 8, 1, 9, 3, 7, 4, 6] {
182            let mut key_bytes = [0u8; 16];
183            key_bytes[0] = i;
184            let key = EKey::new(key_bytes);
185            let loc = ArchiveLocation {
186                archive_id: i as u16,
187                offset: (i as u64) * 100,
188                size: 50,
189            };
190            index.insert(key, loc);
191        }
192
193        // Verify entries are sorted
194        let keys: Vec<_> = index.entries().map(|(k, _)| *k).collect();
195        for i in 1..keys.len() {
196            assert!(keys[i - 1] < keys[i], "Keys should be in sorted order");
197        }
198    }
199}