pgm_extra/index/external/
one_level.rs1use alloc::vec::Vec;
7use core::ops::RangeBounds;
8
9use crate::error::Error;
10
11use crate::index::key::Indexable;
12use crate::index::model::build_segments;
13use crate::index::segment::Segment;
14
15use crate::util::ApproxPos;
16use crate::util::range::range_to_indices;
17use crate::util::search::{pgm_add_eps, pgm_sub_eps};
18
19#[derive(Clone, Debug)]
37#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
38#[cfg_attr(
39 feature = "serde",
40 serde(bound = "T::Key: serde::Serialize + serde::de::DeserializeOwned")
41)]
42pub struct OneLevel<T: Indexable> {
43 epsilon: usize,
44 len: usize,
45 segments: Vec<Segment<T::Key>>,
46}
47
48impl<T: Indexable> OneLevel<T>
49where
50 T::Key: Ord,
51{
52 pub fn new(data: &[T], epsilon: usize) -> Result<Self, Error> {
54 if data.is_empty() {
55 return Err(Error::EmptyInput);
56 }
57 if epsilon == 0 {
58 return Err(Error::InvalidEpsilon);
59 }
60
61 debug_assert!(
62 data.windows(2)
63 .all(|w| w[0].index_key() <= w[1].index_key()),
64 "data must be sorted by index key"
65 );
66
67 let keys: Vec<T::Key> = data.iter().map(|v| v.index_key()).collect();
68 let segments = build_segments(&keys, epsilon);
69
70 Ok(Self {
71 segments,
72 epsilon,
73 len: keys.len(),
74 })
75 }
76
77 #[inline]
78 fn find_segment(&self, key: &T::Key) -> usize {
79 let pos = self
80 .segments
81 .partition_point(|s| s.key <= *key)
82 .saturating_sub(1);
83 pos.min(self.segments.len().saturating_sub(1))
84 }
85
86 #[inline]
88 pub fn search(&self, value: &T) -> ApproxPos {
89 let key = value.index_key();
90 self.search_by_key(&key)
91 }
92
93 #[inline]
95 pub fn search_by_key(&self, key: &T::Key) -> ApproxPos {
96 let seg_idx = self.find_segment(key);
97 let segment = &self.segments[seg_idx];
98
99 let pos = segment.predict(*key).min(self.len.saturating_sub(1));
100 let lo = pgm_sub_eps(pos, self.epsilon);
101 let hi = pgm_add_eps(pos, self.epsilon, self.len);
102
103 ApproxPos::new(pos, lo, hi)
104 }
105
106 #[inline]
108 pub fn lower_bound(&self, data: &[T], value: &T) -> usize
109 where
110 T: Ord,
111 {
112 let key = value.index_key();
113 let approx = self.search_by_key(&key);
114 let len = data.len();
115
116 if len == 0 {
117 return 0;
118 }
119
120 let pos = approx.pos.min(len - 1);
121 let k = &data[pos];
122
123 if *k == *value {
124 let mut i = pos;
125 while i > 0 && data[i - 1] == *value {
126 i -= 1;
127 }
128 return i;
129 }
130
131 if *k < *value {
132 if pos + 1 < len && data[pos + 1] >= *value {
133 return pos + 1;
134 }
135 } else if pos > 0 && data[pos - 1] < *value {
136 return pos;
137 }
138
139 let lo = approx.lo;
140 let hi = approx.hi.min(len);
141 let slice = &data[lo..hi];
142 lo + slice.partition_point(|x| x < value)
143 }
144
145 #[inline]
147 pub fn upper_bound(&self, data: &[T], value: &T) -> usize
148 where
149 T: Ord,
150 {
151 let idx = self.lower_bound(data, value);
152 let mut i = idx;
153 while i < data.len() && data[i] == *value {
154 i += 1;
155 }
156 i
157 }
158
159 #[inline]
161 pub fn contains(&self, data: &[T], value: &T) -> bool
162 where
163 T: Ord,
164 {
165 let idx = self.lower_bound(data, value);
166 idx < data.len() && data[idx] == *value
167 }
168
169 #[inline]
170 pub fn len(&self) -> usize {
171 self.len
172 }
173
174 #[inline]
175 pub fn is_empty(&self) -> bool {
176 self.len == 0
177 }
178
179 #[inline]
180 pub fn segments_count(&self) -> usize {
181 self.segments.len()
182 }
183
184 #[inline]
185 pub fn epsilon(&self) -> usize {
186 self.epsilon
187 }
188
189 pub fn size_in_bytes(&self) -> usize {
190 core::mem::size_of::<Self>()
191 + self.segments.capacity() * core::mem::size_of::<Segment<T::Key>>()
192 }
193
194 #[inline]
196 pub fn range_indices<R>(&self, data: &[T], range: R) -> (usize, usize)
197 where
198 T: Ord,
199 R: RangeBounds<T>,
200 {
201 range_to_indices(
202 range,
203 data.len(),
204 |v| self.lower_bound(data, v),
205 |v| self.upper_bound(data, v),
206 )
207 }
208
209 #[inline]
211 pub fn range<'a, R>(&self, data: &'a [T], range: R) -> impl DoubleEndedIterator<Item = &'a T>
212 where
213 T: Ord,
214 R: RangeBounds<T>,
215 {
216 let (start, end) = self.range_indices(data, range);
217 data[start..end].iter()
218 }
219}
220
221impl<T: Indexable> crate::index::External<T> for OneLevel<T>
222where
223 T::Key: Ord,
224{
225 #[inline]
226 fn search(&self, value: &T) -> ApproxPos {
227 self.search(value)
228 }
229
230 #[inline]
231 fn lower_bound(&self, data: &[T], value: &T) -> usize
232 where
233 T: Ord,
234 {
235 self.lower_bound(data, value)
236 }
237
238 #[inline]
239 fn upper_bound(&self, data: &[T], value: &T) -> usize
240 where
241 T: Ord,
242 {
243 self.upper_bound(data, value)
244 }
245
246 #[inline]
247 fn contains(&self, data: &[T], value: &T) -> bool
248 where
249 T: Ord,
250 {
251 self.contains(data, value)
252 }
253
254 #[inline]
255 fn len(&self) -> usize {
256 self.len()
257 }
258
259 #[inline]
260 fn segments_count(&self) -> usize {
261 self.segments_count()
262 }
263
264 #[inline]
265 fn epsilon(&self) -> usize {
266 self.epsilon()
267 }
268
269 #[inline]
270 fn size_in_bytes(&self) -> usize {
271 self.size_in_bytes()
272 }
273}
274
275#[cfg(test)]
276mod tests {
277 use super::*;
278 use alloc::vec;
279 use alloc::vec::Vec;
280
281 #[test]
282 fn test_one_level_basic() {
283 let keys: Vec<u64> = (0..1000).collect();
284 let index = OneLevel::new(&keys, 8).unwrap();
285
286 assert_eq!(index.len(), 1000);
287 assert!(!index.is_empty());
288 }
289
290 #[test]
291 fn test_one_level_search() {
292 let keys: Vec<u64> = (0..1000).collect();
293 let index = OneLevel::new(&keys, 8).unwrap();
294
295 for &key in &[0u64, 100, 500, 999] {
296 let idx = index.lower_bound(&keys, &key);
297 assert_eq!(idx, key as usize);
298 }
299 }
300
301 #[test]
302 fn test_one_level_contains() {
303 let keys: Vec<u64> = (0..100).map(|i| i * 2).collect();
304 let index = OneLevel::new(&keys, 8).unwrap();
305
306 assert!(index.contains(&keys, &0));
307 assert!(index.contains(&keys, &50));
308 assert!(!index.contains(&keys, &1));
309 assert!(!index.contains(&keys, &51));
310 }
311
312 #[test]
313 fn test_one_level_empty_error() {
314 let keys: Vec<u64> = vec![];
315 let result = OneLevel::new(&keys, 8);
316 assert!(matches!(result, Err(Error::EmptyInput)));
317 }
318}