pgm_extra/index/external/mod.rs
1//! External-keys PGM indices.
2//!
3//! These indices store only the learned model metadata. The actual data is stored
4//! externally and passed to query methods as a slice.
5//!
6//! Use these when:
7//! - You want to share the data between multiple indices
8//! - You want fine control over data storage
9//! - You need to update the data without rebuilding the index
10//!
11//! # Index Types
12//!
13//! - [`Static`]: Multi-level recursive index for best query performance
14//! - [`OneLevel`]: Single-level index with lower memory overhead
15//! - [`Cached`]: Multi-level index with hot-key cache
16//!
17//! # Example
18//!
19//! ```
20//! use pgm_extra::index::external::{Static, OneLevel, Cached};
21//!
22//! let data: Vec<u64> = (0..10000).collect();
23//!
24//! // Multi-level for large datasets
25//! let pgm = Static::new(&data, 64, 4).unwrap();
26//! assert!(pgm.contains(&data, &5000));
27//!
28//! // Single-level for smaller datasets
29//! let one = OneLevel::new(&data, 64).unwrap();
30//! assert!(one.contains(&data, &5000));
31//!
32//! // Cached for hot-key workloads
33//! let cached = Cached::new(&data, 64, 4).unwrap();
34//! assert!(cached.contains(&data, &5000));
35//! ```
36
37mod cached;
38mod one_level;
39mod r#static;
40
41pub use cached::Cached;
42pub use one_level::OneLevel;
43pub use r#static::Static;
44
45use crate::index::key::Indexable;
46use crate::util::ApproxPos;
47
48/// Trait for external-keys PGM indices that operate over a sorted slice.
49///
50/// This trait provides a common interface for [`crate::index::external::Static`],
51/// [`crate::index::external::OneLevel`], and [`crate::index::external::Cached`],
52/// allowing generic code to work with any of them.
53///
54/// # Example
55///
56/// ```
57/// use pgm_extra::index::external;
58/// use pgm_extra::index::External;
59/// use pgm_extra::index::key::Indexable;
60///
61/// fn search_any<T, I>(index: &I, data: &[T], value: &T) -> bool
62/// where
63/// T: Indexable + Ord,
64/// T::Key: Ord,
65/// I: External<T>,
66/// {
67/// index.contains(data, value)
68/// }
69///
70/// let data: Vec<u64> = (0..1000).collect();
71/// let pgm = external::Static::new(&data, 64, 4).unwrap();
72/// let one_level = external::OneLevel::new(&data, 64).unwrap();
73///
74/// assert!(search_any(&pgm, &data, &500));
75/// assert!(search_any(&one_level, &data, &500));
76/// ```
77pub trait External<T: Indexable> {
78 /// Get an approximate position for the value.
79 fn search(&self, value: &T) -> ApproxPos;
80
81 /// Find the first position where `data[pos] >= value`.
82 fn lower_bound(&self, data: &[T], value: &T) -> usize
83 where
84 T: Ord;
85
86 /// Find the first position where `data[pos] > value`.
87 fn upper_bound(&self, data: &[T], value: &T) -> usize
88 where
89 T: Ord;
90
91 /// Check if the value exists in the sorted slice.
92 fn contains(&self, data: &[T], value: &T) -> bool
93 where
94 T: Ord;
95
96 /// Number of elements the index was built for.
97 fn len(&self) -> usize;
98
99 /// Whether the index is empty.
100 fn is_empty(&self) -> bool {
101 self.len() == 0
102 }
103
104 /// Number of segments in the index.
105 fn segments_count(&self) -> usize;
106
107 /// Epsilon value used for the bottom level.
108 fn epsilon(&self) -> usize;
109
110 /// Approximate memory usage in bytes.
111 fn size_in_bytes(&self) -> usize;
112}