pgm_extra/
lib.rs

1//! # PGM-Extra
2//!
3//! A Rust implementation of the PGM-index, a learned index structure that uses
4//! piecewise linear models to approximate the cumulative distribution function
5//! of sorted data for fast lookups.
6//!
7//! ## Quick Start
8//!
9//! For most use cases, use [`Set`] or [`Map`] which own their data:
10//!
11//! ```rust
12//! use pgm_extra::{Set, Map};
13//!
14//! // Set (like BTreeSet)
15//! let set: Set<u64> = (0..10000).collect();
16//! assert!(set.contains(&5000));
17//!
18//! // Map (like BTreeMap)
19//! let map: Map<u64, &str> = vec![(1, "one"), (2, "two")].into_iter().collect();
20//! assert_eq!(map.get(&1), Some(&"one"));
21//!
22//! // Works with strings too!
23//! let words: Set<&str> = vec!["apple", "banana", "cherry"].into_iter().collect();
24//! assert!(words.contains(&"banana"));
25//! ```
26//!
27//! ## Index Types
28//!
29//! ### Owned collections (recommended for most uses)
30//!
31//! - [`Set`]: BTreeSet-like set optimized for read-heavy workloads
32//! - [`Map`]: BTreeMap-like map optimized for read-heavy workloads
33//!
34//! ### External-keys indices (for advanced use cases)
35//!
36//! - [`Static`]: Multi-level recursive index (also available as [`index::external::Static`])
37//! - [`OneLevel`]: Single-level index for smaller datasets
38//! - [`Cached`]: Multi-level with hot-key cache for repeated lookups
39//! - [`Dynamic`]: Mutable index with insert/delete (requires `std` feature)
40//!
41//! ## Features
42//!
43//! - `std` (default): Enables `Dynamic` index and other std-only features
44//! - `parallel`: Enables parallel index construction
45//! - `simd`: Enables SIMD-accelerated search routines
46//! - `serde`: Enables serialization/deserialization with serde
47//! - `rkyv`: Enables zero-copy serialization/deserialization with rkyv
48//!
49//! ## Performance
50//!
51//! The PGM-index provides O(log n / log epsilon) query time with significantly
52//! smaller space overhead compared to traditional B-trees. Construction is O(n).
53
54#![cfg_attr(not(feature = "std"), no_std)]
55extern crate alloc;
56
57pub mod collections;
58pub mod error;
59pub mod index;
60pub mod util;
61
62// Primary exports
63pub use collections::map::Map;
64pub use collections::set::Set;
65
66// Re-export commonly used types
67pub use error::Error;
68
69// Re-export index types at crate root for convenience
70pub use index::external::Cached as CachedIndex;
71pub use index::external::OneLevel as OneLevelIndex;
72pub use index::external::Static as StaticIndex;
73
74// Re-export index types at crate root for convenience
75pub use index::external::Cached;
76pub use index::external::OneLevel;
77pub use index::external::Static;
78#[cfg(feature = "std")]
79pub use index::owned::Dynamic;
80#[cfg(feature = "std")]
81pub use index::owned::Dynamic as DynamicIndex;
82
83#[cfg(test)]
84mod tests {
85    use super::*;
86    use alloc::vec::Vec;
87
88    #[test]
89    fn test_integration_basic() {
90        let data: Vec<u64> = (0..10000).collect();
91        let index = index::Builder::new()
92            .epsilon(64)
93            .epsilon_recursive(4)
94            .build(&data)
95            .unwrap();
96
97        for i in (0..10000).step_by(100) {
98            let pos = index.lower_bound(&data, &i);
99            assert_eq!(pos, i as usize);
100        }
101    }
102
103    #[test]
104    fn test_integration_signed() {
105        let data: Vec<i64> = (-5000..5000).collect();
106        let index = index::external::Static::new(&data, 64, 4).unwrap();
107
108        for i in (-5000i64..5000).step_by(100) {
109            let pos = index.lower_bound(&data, &i);
110            let expected = (i + 5000) as usize;
111            assert_eq!(pos, expected, "Failed for key {}", i);
112        }
113
114        assert!(index.contains(&data, &-5000));
115        assert!(!index.contains(&data, &5000));
116    }
117
118    #[test]
119    fn test_integration_sparse() {
120        let data: Vec<u64> = (0..1000).map(|i| i * i).collect();
121        let index = index::external::Static::new(&data, 32, 4).unwrap();
122
123        for (i, &key) in data.iter().enumerate() {
124            let pos = index.lower_bound(&data, &key);
125            assert_eq!(pos, i, "Failed for key {} at index {}", key, i);
126        }
127    }
128
129    #[test]
130    fn test_missing_keys() {
131        let data: Vec<u64> = (0..100).map(|i| i * 2).collect();
132        let index = index::external::Static::new(&data, 8, 4).unwrap();
133
134        let pos = index.lower_bound(&data, &1);
135        assert_eq!(pos, 1);
136
137        let pos = index.lower_bound(&data, &199);
138        assert_eq!(pos, 100);
139    }
140
141    #[test]
142    fn test_set_api() {
143        let set: Set<u64> = (0..1000).collect();
144        assert!(set.contains(&500));
145        assert!(!set.contains(&1001));
146    }
147
148    #[test]
149    fn test_map_api() {
150        let map: Map<u64, i32> = (0..100).map(|i| (i, i as i32 * 2)).collect();
151        assert_eq!(map.get(&50), Some(&100));
152    }
153}