pgm_extra/index/external/
one_level.rs1use alloc::vec::Vec;
7use core::ops::RangeBounds;
8
9use crate::error::Error;
10
11use crate::index::key::Indexable;
12use crate::index::model::build_segments;
13use crate::index::segment::Segment;
14
15use crate::util::ApproxPos;
16use crate::util::range::range_to_indices;
17use crate::util::search::{pgm_add_eps, pgm_sub_eps};
18
19#[derive(Clone, Debug)]
37#[cfg_attr(
38 feature = "rkyv",
39 derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)
40)]
41#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
42#[cfg_attr(
43 feature = "serde",
44 serde(bound = "T::Key: serde::Serialize + serde::de::DeserializeOwned")
45)]
46pub struct OneLevel<T: Indexable> {
47 epsilon: usize,
48 len: usize,
49 segments: Vec<Segment<T::Key>>,
50}
51
52impl<T: Indexable> OneLevel<T>
53where
54 T::Key: Ord,
55{
56 pub fn new(data: &[T], epsilon: usize) -> Result<Self, Error> {
58 if data.is_empty() {
59 return Err(Error::EmptyInput);
60 }
61 if epsilon == 0 {
62 return Err(Error::InvalidEpsilon);
63 }
64
65 debug_assert!(
66 data.windows(2)
67 .all(|w| w[0].index_key() <= w[1].index_key()),
68 "data must be sorted by index key"
69 );
70
71 let keys: Vec<T::Key> = data.iter().map(|v| v.index_key()).collect();
72 let segments = build_segments(&keys, epsilon);
73
74 Ok(Self {
75 segments,
76 epsilon,
77 len: keys.len(),
78 })
79 }
80
81 #[inline]
82 fn find_segment(&self, key: &T::Key) -> usize {
83 let pos = self
84 .segments
85 .partition_point(|s| s.key <= *key)
86 .saturating_sub(1);
87 pos.min(self.segments.len().saturating_sub(1))
88 }
89
90 #[inline]
92 pub fn search(&self, value: &T) -> ApproxPos {
93 let key = value.index_key();
94 self.search_by_key(&key)
95 }
96
97 #[inline]
99 pub fn search_by_key(&self, key: &T::Key) -> ApproxPos {
100 let seg_idx = self.find_segment(key);
101 let segment = &self.segments[seg_idx];
102
103 let pos = segment.predict(*key).min(self.len.saturating_sub(1));
104 let lo = pgm_sub_eps(pos, self.epsilon);
105 let hi = pgm_add_eps(pos, self.epsilon, self.len);
106
107 ApproxPos::new(pos, lo, hi)
108 }
109
110 #[inline]
112 pub fn lower_bound(&self, data: &[T], value: &T) -> usize
113 where
114 T: Ord,
115 {
116 let key = value.index_key();
117 let approx = self.search_by_key(&key);
118 let len = data.len();
119
120 if len == 0 {
121 return 0;
122 }
123
124 let pos = approx.pos.min(len - 1);
125 let k = &data[pos];
126
127 if *k == *value {
128 let mut i = pos;
129 while i > 0 && data[i - 1] == *value {
130 i -= 1;
131 }
132 return i;
133 }
134
135 if *k < *value {
136 if pos + 1 < len && data[pos + 1] >= *value {
137 return pos + 1;
138 }
139 } else if pos > 0 && data[pos - 1] < *value {
140 return pos;
141 }
142
143 let lo = approx.lo.saturating_sub(self.epsilon);
146 let hi = (approx.hi + self.epsilon).min(len);
147 let slice = &data[lo..hi];
148 lo + slice.partition_point(|x| x < value)
149 }
150
151 #[inline]
153 pub fn upper_bound(&self, data: &[T], value: &T) -> usize
154 where
155 T: Ord,
156 {
157 let idx = self.lower_bound(data, value);
158 let mut i = idx;
159 while i < data.len() && data[i] == *value {
160 i += 1;
161 }
162 i
163 }
164
165 #[inline]
167 pub fn contains(&self, data: &[T], value: &T) -> bool
168 where
169 T: Ord,
170 {
171 let idx = self.lower_bound(data, value);
172 idx < data.len() && data[idx] == *value
173 }
174
175 #[inline]
176 pub fn len(&self) -> usize {
177 self.len
178 }
179
180 #[inline]
181 pub fn is_empty(&self) -> bool {
182 self.len == 0
183 }
184
185 #[inline]
186 pub fn segments_count(&self) -> usize {
187 self.segments.len()
188 }
189
190 #[inline]
191 pub fn epsilon(&self) -> usize {
192 self.epsilon
193 }
194
195 pub fn size_in_bytes(&self) -> usize {
196 core::mem::size_of::<Self>()
197 + self.segments.capacity() * core::mem::size_of::<Segment<T::Key>>()
198 }
199
200 #[inline]
202 pub fn range_indices<R>(&self, data: &[T], range: R) -> (usize, usize)
203 where
204 T: Ord,
205 R: RangeBounds<T>,
206 {
207 range_to_indices(
208 range,
209 data.len(),
210 |v| self.lower_bound(data, v),
211 |v| self.upper_bound(data, v),
212 )
213 }
214
215 #[inline]
217 pub fn range<'a, R>(&self, data: &'a [T], range: R) -> impl DoubleEndedIterator<Item = &'a T>
218 where
219 T: Ord,
220 R: RangeBounds<T>,
221 {
222 let (start, end) = self.range_indices(data, range);
223 data[start..end].iter()
224 }
225}
226
227impl<T: Indexable> crate::index::External<T> for OneLevel<T>
228where
229 T::Key: Ord,
230{
231 #[inline]
232 fn search(&self, value: &T) -> ApproxPos {
233 self.search(value)
234 }
235
236 #[inline]
237 fn lower_bound(&self, data: &[T], value: &T) -> usize
238 where
239 T: Ord,
240 {
241 self.lower_bound(data, value)
242 }
243
244 #[inline]
245 fn upper_bound(&self, data: &[T], value: &T) -> usize
246 where
247 T: Ord,
248 {
249 self.upper_bound(data, value)
250 }
251
252 #[inline]
253 fn contains(&self, data: &[T], value: &T) -> bool
254 where
255 T: Ord,
256 {
257 self.contains(data, value)
258 }
259
260 #[inline]
261 fn len(&self) -> usize {
262 self.len()
263 }
264
265 #[inline]
266 fn segments_count(&self) -> usize {
267 self.segments_count()
268 }
269
270 #[inline]
271 fn epsilon(&self) -> usize {
272 self.epsilon()
273 }
274
275 #[inline]
276 fn size_in_bytes(&self) -> usize {
277 self.size_in_bytes()
278 }
279}
280
281#[cfg(test)]
282mod tests {
283 use super::*;
284 use alloc::vec;
285 use alloc::vec::Vec;
286
287 #[test]
288 fn test_one_level_basic() {
289 let keys: Vec<u64> = (0..1000).collect();
290 let index = OneLevel::new(&keys, 8).unwrap();
291
292 assert_eq!(index.len(), 1000);
293 assert!(!index.is_empty());
294 }
295
296 #[test]
297 fn test_one_level_search() {
298 let keys: Vec<u64> = (0..1000).collect();
299 let index = OneLevel::new(&keys, 8).unwrap();
300
301 for &key in &[0u64, 100, 500, 999] {
302 let idx = index.lower_bound(&keys, &key);
303 assert_eq!(idx, key as usize);
304 }
305 }
306
307 #[test]
308 fn test_one_level_contains() {
309 let keys: Vec<u64> = (0..100).map(|i| i * 2).collect();
310 let index = OneLevel::new(&keys, 8).unwrap();
311
312 assert!(index.contains(&keys, &0));
313 assert!(index.contains(&keys, &50));
314 assert!(!index.contains(&keys, &1));
315 assert!(!index.contains(&keys, &51));
316 }
317
318 #[test]
319 fn test_one_level_empty_error() {
320 let keys: Vec<u64> = vec![];
321 let result = OneLevel::new(&keys, 8);
322 assert!(matches!(result, Err(Error::EmptyInput)));
323 }
324}