pgm_extra/
lib.rs

1//! # PGM-Extra
2//!
3//! A Rust implementation of the PGM-index, a learned index structure that uses
4//! piecewise linear models to approximate the cumulative distribution function
5//! of sorted data for fast lookups.
6//!
7//! ## Quick Start
8//!
9//! For most use cases, use [`Set`] or [`Map`] which own their data:
10//!
11//! ```rust
12//! use pgm_extra::{Set, Map};
13//!
14//! // Set (like BTreeSet)
15//! let set: Set<u64> = (0..10000).collect();
16//! assert!(set.contains(&5000));
17//!
18//! // Map (like BTreeMap)
19//! let map: Map<u64, &str> = vec![(1, "one"), (2, "two")].into_iter().collect();
20//! assert_eq!(map.get(&1), Some(&"one"));
21//!
22//! // Works with strings too!
23//! let words: Set<&str> = vec!["apple", "banana", "cherry"].into_iter().collect();
24//! assert!(words.contains(&"banana"));
25//! ```
26//!
27//! ## Index Types
28//!
29//! ### Owned collections (recommended for most uses)
30//!
31//! - [`Set`]: BTreeSet-like set optimized for read-heavy workloads
32//! - [`Map`]: BTreeMap-like map optimized for read-heavy workloads
33//!
34//! ### External-keys indices (for advanced use cases)
35//!
36//! - [`Static`]: Multi-level recursive index (also available as [`index::external::Static`])
37//! - [`OneLevel`]: Single-level index for smaller datasets
38//! - [`Cached`]: Multi-level with hot-key cache for repeated lookups
39//! - [`Dynamic`]: Mutable index with insert/delete (requires `std` feature)
40//!
41//! ## Features
42//!
43//! - `std` (default): Enables `Dynamic` index and other std-only features
44//! - `parallel`: Enables parallel index construction
45//! - `simd`: Enables SIMD-accelerated search routines
46//! - `serde`: Enables serialization/deserialization
47//!
48//! ## Performance
49//!
50//! The PGM-index provides O(log n / log epsilon) query time with significantly
51//! smaller space overhead compared to traditional B-trees. Construction is O(n).
52
53#![cfg_attr(not(feature = "std"), no_std)]
54extern crate alloc;
55
56pub mod collections;
57pub mod error;
58pub mod index;
59pub mod util;
60
61// Primary exports
62pub use collections::map::Map;
63pub use collections::set::Set;
64
65// Re-export commonly used types
66pub use error::Error;
67
68// Re-export index types at crate root for convenience
69pub use index::external::Cached as CachedIndex;
70pub use index::external::OneLevel as OneLevelIndex;
71pub use index::external::Static as StaticIndex;
72
73// Re-export index types at crate root for convenience
74pub use index::external::Cached;
75pub use index::external::OneLevel;
76pub use index::external::Static;
77#[cfg(feature = "std")]
78pub use index::owned::Dynamic;
79#[cfg(feature = "std")]
80pub use index::owned::Dynamic as DynamicIndex;
81
82#[cfg(test)]
83mod tests {
84    use super::*;
85    use alloc::vec::Vec;
86
87    #[test]
88    fn test_integration_basic() {
89        let data: Vec<u64> = (0..10000).collect();
90        let index = index::Builder::new()
91            .epsilon(64)
92            .epsilon_recursive(4)
93            .build(&data)
94            .unwrap();
95
96        for i in (0..10000).step_by(100) {
97            let pos = index.lower_bound(&data, &i);
98            assert_eq!(pos, i as usize);
99        }
100    }
101
102    #[test]
103    fn test_integration_signed() {
104        let data: Vec<i64> = (-5000..5000).collect();
105        let index = index::external::Static::new(&data, 64, 4).unwrap();
106
107        for i in (-5000i64..5000).step_by(100) {
108            let pos = index.lower_bound(&data, &i);
109            let expected = (i + 5000) as usize;
110            assert_eq!(pos, expected, "Failed for key {}", i);
111        }
112
113        assert!(index.contains(&data, &-5000));
114        assert!(!index.contains(&data, &5000));
115    }
116
117    #[test]
118    fn test_integration_sparse() {
119        let data: Vec<u64> = (0..1000).map(|i| i * i).collect();
120        let index = index::external::Static::new(&data, 32, 4).unwrap();
121
122        for (i, &key) in data.iter().enumerate() {
123            let pos = index.lower_bound(&data, &key);
124            assert_eq!(pos, i, "Failed for key {} at index {}", key, i);
125        }
126    }
127
128    #[test]
129    fn test_missing_keys() {
130        let data: Vec<u64> = (0..100).map(|i| i * 2).collect();
131        let index = index::external::Static::new(&data, 8, 4).unwrap();
132
133        let pos = index.lower_bound(&data, &1);
134        assert_eq!(pos, 1);
135
136        let pos = index.lower_bound(&data, &199);
137        assert_eq!(pos, 100);
138    }
139
140    #[test]
141    fn test_set_api() {
142        let set: Set<u64> = (0..1000).collect();
143        assert!(set.contains(&500));
144        assert!(!set.contains(&1001));
145    }
146
147    #[test]
148    fn test_map_api() {
149        let map: Map<u64, i32> = (0..100).map(|i| (i, i as i32 * 2)).collect();
150        assert_eq!(map.get(&50), Some(&100));
151    }
152}