pgm_extra/index/owned/
dynamic.rs

1//! Dynamic PGM-Index supporting insertions and deletions.
2//!
3//! This index owns its data and supports modifications with automatic rebuilding.
4
5use alloc::vec::Vec;
6use core::ops::RangeBounds;
7use std::collections::BTreeSet;
8
9use crate::error::Error;
10use crate::index::external::Static;
11use crate::index::key::Indexable;
12
13/// A dynamic PGM-Index that supports insertions and deletions.
14///
15/// This index owns its data and maintains insert/delete buffers that are
16/// periodically merged into the main index. When the buffers exceed a
17/// threshold, the index is automatically rebuilt.
18///
19/// # Example
20///
21/// ```
22/// use pgm_extra::index::owned::Dynamic;
23///
24/// let mut index: Dynamic<u64> = Dynamic::new(16, 4);
25///
26/// index.insert(5);
27/// index.insert(3);
28/// index.insert(7);
29///
30/// assert!(index.contains(&3));
31/// assert!(index.contains(&5));
32/// assert!(!index.contains(&4));
33/// ```
34#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
35#[cfg_attr(
36    feature = "serde",
37    serde(
38        bound = "T: serde::Serialize + serde::de::DeserializeOwned, T::Key: serde::Serialize + serde::de::DeserializeOwned"
39    )
40)]
41pub struct Dynamic<T: Indexable + Ord>
42where
43    T::Key: Ord,
44{
45    base_index: Option<Static<T>>,
46    base_data: Vec<T>,
47    delete_buffer: BTreeSet<T>,
48    epsilon: usize,
49    epsilon_recursive: usize,
50    insert_buffer: BTreeSet<T>,
51    rebuild_threshold: usize,
52}
53
54impl<T: Indexable + Ord + std::fmt::Debug> std::fmt::Debug for Dynamic<T>
55where
56    T::Key: Ord,
57{
58    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
59        f.debug_struct("Dynamic")
60            .field("base_data_len", &self.base_data.len())
61            .field("insert_buffer_len", &self.insert_buffer.len())
62            .field("delete_buffer_len", &self.delete_buffer.len())
63            .field("epsilon", &self.epsilon)
64            .field("epsilon_recursive", &self.epsilon_recursive)
65            .finish()
66    }
67}
68
69impl<T: Indexable + Ord + Copy> Dynamic<T>
70where
71    T::Key: Ord,
72{
73    /// Create a new empty dynamic index.
74    pub fn new(epsilon: usize, epsilon_recursive: usize) -> Self {
75        Self {
76            base_index: None,
77            base_data: Vec::new(),
78            delete_buffer: BTreeSet::new(),
79            epsilon: epsilon.max(1),
80            epsilon_recursive: epsilon_recursive.max(1),
81            insert_buffer: BTreeSet::new(),
82            rebuild_threshold: 1024,
83        }
84    }
85
86    /// Create a dynamic index from pre-sorted data.
87    pub fn from_sorted(
88        data: Vec<T>,
89        epsilon: usize,
90        epsilon_recursive: usize,
91    ) -> Result<Self, Error> {
92        let epsilon = epsilon.max(1);
93        let epsilon_recursive = epsilon_recursive.max(1);
94
95        if data.is_empty() {
96            return Ok(Self::new(epsilon, epsilon_recursive));
97        }
98
99        let base_index = Static::new(&data, epsilon, epsilon_recursive)?;
100        let rebuild_threshold = (data.len() / 10).max(1024);
101
102        Ok(Self {
103            base_index: Some(base_index),
104            base_data: data,
105            delete_buffer: BTreeSet::new(),
106            epsilon,
107            epsilon_recursive,
108            insert_buffer: BTreeSet::new(),
109            rebuild_threshold,
110        })
111    }
112
113    /// Set the threshold for automatic rebuilding.
114    pub fn with_rebuild_threshold(mut self, threshold: usize) -> Self {
115        self.rebuild_threshold = threshold.max(1);
116        self
117    }
118
119    /// Insert a value into the index.
120    pub fn insert(&mut self, value: T) {
121        self.insert_buffer.insert(value);
122        self.maybe_rebuild();
123    }
124
125    /// Remove a value from the index.
126    pub fn remove(&mut self, value: &T) -> bool {
127        if self.insert_buffer.remove(value) {
128            return true;
129        }
130
131        if self
132            .base_index
133            .as_ref()
134            .is_some_and(|idx| idx.contains(&self.base_data, value))
135        {
136            self.delete_buffer.insert(*value);
137            self.maybe_rebuild();
138            return true;
139        }
140
141        false
142    }
143
144    /// Check if a value exists in the index.
145    pub fn contains(&self, value: &T) -> bool {
146        if self.insert_buffer.contains(value) {
147            return true;
148        }
149
150        if self.delete_buffer.contains(value) {
151            return false;
152        }
153
154        (self.base_index.as_ref()).is_some_and(|idx| idx.contains(&self.base_data, value))
155    }
156
157    /// Find the smallest value >= the given value.
158    pub fn lower_bound(&self, value: &T) -> Option<T> {
159        let base_result = self.base_index.as_ref().and_then(|idx| {
160            let pos = idx.lower_bound(&self.base_data, value);
161            if pos < self.base_data.len() {
162                let v = self.base_data[pos];
163                if !self.delete_buffer.contains(&v) {
164                    return Some(v);
165                }
166                for i in (pos + 1)..self.base_data.len() {
167                    let v = self.base_data[i];
168                    if !self.delete_buffer.contains(&v) {
169                        return Some(v);
170                    }
171                }
172            }
173            None
174        });
175
176        let buffer_result = self.insert_buffer.range(value..).next().copied();
177
178        match (base_result, buffer_result) {
179            (Some(a), Some(b)) => Some(a.min(b)),
180            (Some(a), None) => Some(a),
181            (None, Some(b)) => Some(b),
182            (None, None) => None,
183        }
184    }
185
186    /// Get the number of elements in the index.
187    pub fn len(&self) -> usize {
188        let base_len = self.base_data.len();
189        let inserts = self.insert_buffer.len();
190        let deletes = self.delete_buffer.len();
191        base_len + inserts - deletes
192    }
193
194    pub fn is_empty(&self) -> bool {
195        self.len() == 0
196    }
197
198    /// Get the number of pending operations in buffers.
199    pub fn pending_operations(&self) -> usize {
200        self.insert_buffer.len() + self.delete_buffer.len()
201    }
202
203    fn maybe_rebuild(&mut self) {
204        if self.pending_operations() < self.rebuild_threshold {
205            return;
206        }
207
208        self.force_rebuild();
209    }
210
211    /// Force an immediate rebuild of the index.
212    pub fn force_rebuild(&mut self) {
213        let mut all_data: Vec<T> = self
214            .base_data
215            .iter()
216            .copied()
217            .filter(|v| !self.delete_buffer.contains(v))
218            .collect();
219
220        all_data.extend(self.insert_buffer.iter().copied());
221        all_data.sort();
222
223        if all_data.is_empty() {
224            self.base_data = Vec::new();
225            self.base_index = None;
226        } else {
227            match Static::new(&all_data, self.epsilon, self.epsilon_recursive) {
228                Ok(new_index) => {
229                    self.base_data = all_data;
230                    self.base_index = Some(new_index);
231                }
232                Err(_) => {
233                    self.base_data = all_data;
234                    self.base_index = None;
235                }
236            }
237        }
238
239        self.insert_buffer.clear();
240        self.delete_buffer.clear();
241    }
242
243    /// Iterate over all values in sorted order.
244    pub fn iter(&self) -> impl Iterator<Item = T> + '_ {
245        let base_iter = self
246            .base_data
247            .iter()
248            .copied()
249            .filter(|v| !self.delete_buffer.contains(v));
250
251        let buffer_iter = self.insert_buffer.iter().copied();
252
253        MergedIterator::new(base_iter, buffer_iter)
254    }
255
256    /// Iterate over values in the given range.
257    pub fn range<'a, R>(&'a self, range: R) -> impl Iterator<Item = T> + 'a
258    where
259        R: RangeBounds<T> + Clone + 'a,
260    {
261        let range_clone = range.clone();
262        let base_iter = self
263            .base_data
264            .iter()
265            .copied()
266            .filter(|v| !self.delete_buffer.contains(v))
267            .filter(move |v| range_clone.contains(v));
268
269        let buffer_iter = self
270            .insert_buffer
271            .range((range.start_bound().cloned(), range.end_bound().cloned()))
272            .copied();
273
274        MergedIterator::new(base_iter, buffer_iter)
275    }
276
277    /// Approximate memory usage in bytes.
278    pub fn size_in_bytes(&self) -> usize {
279        let base_size =
280            core::mem::size_of::<Self>() + self.base_data.capacity() * core::mem::size_of::<T>();
281
282        let index_size = self
283            .base_index
284            .as_ref()
285            .map_or(0, |idx| idx.size_in_bytes());
286
287        let buffer_size =
288            (self.insert_buffer.len() + self.delete_buffer.len()) * core::mem::size_of::<T>() * 3;
289
290        base_size + index_size + buffer_size
291    }
292}
293
294struct MergedIterator<I1, I2, T>
295where
296    I1: Iterator<Item = T>,
297    I2: Iterator<Item = T>,
298    T: Ord,
299{
300    iter1: core::iter::Peekable<I1>,
301    iter2: core::iter::Peekable<I2>,
302}
303
304impl<I1, I2, T> MergedIterator<I1, I2, T>
305where
306    I1: Iterator<Item = T>,
307    I2: Iterator<Item = T>,
308    T: Ord,
309{
310    fn new(iter1: I1, iter2: I2) -> Self {
311        Self {
312            iter1: iter1.peekable(),
313            iter2: iter2.peekable(),
314        }
315    }
316}
317
318impl<I1, I2, T> Iterator for MergedIterator<I1, I2, T>
319where
320    I1: Iterator<Item = T>,
321    I2: Iterator<Item = T>,
322    T: Ord + Copy,
323{
324    type Item = T;
325
326    fn next(&mut self) -> Option<Self::Item> {
327        match (self.iter1.peek(), self.iter2.peek()) {
328            (Some(&a), Some(&b)) => {
329                if a < b {
330                    self.iter1.next()
331                } else if a > b {
332                    self.iter2.next()
333                } else {
334                    self.iter2.next();
335                    self.iter1.next()
336                }
337            }
338            (Some(_), None) => self.iter1.next(),
339            (None, Some(_)) => self.iter2.next(),
340            (None, None) => None,
341        }
342    }
343}
344
345impl<T: Indexable + Ord + Copy> core::iter::Extend<T> for Dynamic<T>
346where
347    T::Key: Ord,
348{
349    fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
350        self.insert_buffer.extend(iter);
351        self.maybe_rebuild();
352    }
353}
354
355#[cfg(test)]
356mod tests {
357    use super::*;
358
359    #[test]
360    fn test_dynamic_empty() {
361        let index: Dynamic<u64> = Dynamic::new(16, 4);
362        assert!(index.is_empty());
363        assert_eq!(index.len(), 0);
364    }
365
366    #[test]
367    fn test_dynamic_insert() {
368        let mut index: Dynamic<u64> = Dynamic::new(16, 4);
369
370        index.insert(5);
371        index.insert(3);
372        index.insert(7);
373
374        assert!(index.contains(&3));
375        assert!(index.contains(&5));
376        assert!(index.contains(&7));
377        assert!(!index.contains(&4));
378    }
379
380    #[test]
381    fn test_dynamic_extend() {
382        let mut index: Dynamic<u64> = Dynamic::new(16, 4);
383
384        index.extend(5..7);
385
386        assert!(index.contains(&5));
387        assert!(index.contains(&6));
388        assert!(!index.contains(&7));
389    }
390
391    #[test]
392    fn test_dynamic_remove() {
393        let data: Vec<u64> = (0..100).collect();
394        let mut index = Dynamic::from_sorted(data, 16, 4).unwrap();
395
396        assert!(index.contains(&50));
397        assert!(index.remove(&50));
398        assert!(!index.contains(&50));
399    }
400
401    #[test]
402    fn test_dynamic_rebuild() {
403        let mut index: Dynamic<u64> = Dynamic::new(16, 4).with_rebuild_threshold(10);
404
405        for i in 0..20 {
406            index.insert(i);
407        }
408
409        assert_eq!(index.pending_operations(), 0);
410
411        for i in 0..20 {
412            assert!(index.contains(&i), "Missing value {}", i);
413        }
414    }
415
416    #[test]
417    fn test_dynamic_iter() {
418        let mut index: Dynamic<u64> = Dynamic::new(16, 4);
419
420        index.insert(3);
421        index.insert(1);
422        index.insert(2);
423
424        let collected: Vec<u64> = index.iter().collect();
425        assert_eq!(collected, vec![1, 2, 3]);
426    }
427}