pgm_extra/index/external/
mod.rs

1//! External-keys PGM indices.
2//!
3//! These indices store only the learned model metadata. The actual data is stored
4//! externally and passed to query methods as a slice.
5//!
6//! Use these when:
7//! - You want to share the data between multiple indices
8//! - You want fine control over data storage
9//! - You need to update the data without rebuilding the index
10//!
11//! # Index Types
12//!
13//! - [`Static`]: Multi-level recursive index for best query performance
14//! - [`OneLevel`]: Single-level index with lower memory overhead
15//! - [`Cached`]: Multi-level index with hot-key cache
16//!
17//! # Example
18//!
19//! ```
20//! use pgm_extra::index::external::{Static, OneLevel, Cached};
21//!
22//! let data: Vec<u64> = (0..10000).collect();
23//!
24//! // Multi-level for large datasets
25//! let pgm = Static::new(&data, 64, 4).unwrap();
26//! assert!(pgm.contains(&data, &5000));
27//!
28//! // Single-level for smaller datasets
29//! let one = OneLevel::new(&data, 64).unwrap();
30//! assert!(one.contains(&data, &5000));
31//!
32//! // Cached for hot-key workloads
33//! let cached = Cached::new(&data, 64, 4).unwrap();
34//! assert!(cached.contains(&data, &5000));
35//! ```
36
37mod cached;
38mod one_level;
39mod r#static;
40
41pub use cached::Cached;
42pub use one_level::OneLevel;
43pub use r#static::Static;
44
45use crate::index::key::Indexable;
46use crate::util::ApproxPos;
47
48/// Trait for external-keys PGM indices that operate over a sorted slice.
49///
50/// This trait provides a common interface for [`crate::index::external::Static`],
51/// [`crate::index::external::OneLevel`], and [`crate::index::external::Cached`],
52/// allowing generic code to work with any of them.
53///
54/// # Example
55///
56/// ```
57/// use pgm_extra::index::external;
58/// use pgm_extra::index::External;
59/// use pgm_extra::index::key::Indexable;
60///
61/// fn search_any<T, I>(index: &I, data: &[T], value: &T) -> bool
62/// where
63///     T: Indexable + Ord,
64///     T::Key: Ord,
65///     I: External<T>,
66/// {
67///     index.contains(data, value)
68/// }
69///
70/// let data: Vec<u64> = (0..1000).collect();
71/// let pgm = external::Static::new(&data, 64, 4).unwrap();
72/// let one_level = external::OneLevel::new(&data, 64).unwrap();
73///
74/// assert!(search_any(&pgm, &data, &500));
75/// assert!(search_any(&one_level, &data, &500));
76/// ```
77pub trait External<T: Indexable> {
78    /// Get an approximate position for the value.
79    fn search(&self, value: &T) -> ApproxPos;
80
81    /// Find the first position where `data[pos] >= value`.
82    fn lower_bound(&self, data: &[T], value: &T) -> usize
83    where
84        T: Ord;
85
86    /// Find the first position where `data[pos] > value`.
87    fn upper_bound(&self, data: &[T], value: &T) -> usize
88    where
89        T: Ord;
90
91    /// Check if the value exists in the sorted slice.
92    fn contains(&self, data: &[T], value: &T) -> bool
93    where
94        T: Ord;
95
96    /// Number of elements the index was built for.
97    fn len(&self) -> usize;
98
99    /// Whether the index is empty.
100    fn is_empty(&self) -> bool {
101        self.len() == 0
102    }
103
104    /// Number of segments in the index.
105    fn segments_count(&self) -> usize;
106
107    /// Epsilon value used for the bottom level.
108    fn epsilon(&self) -> usize;
109
110    /// Approximate memory usage in bytes.
111    fn size_in_bytes(&self) -> usize;
112}