pgm_extra/index/external/
one_level.rs1use alloc::vec::Vec;
7use core::ops::RangeBounds;
8
9use crate::error::Error;
10
11use crate::index::key::Indexable;
12use crate::index::model::build_segments;
13use crate::index::segment::Segment;
14
15use crate::util::ApproxPos;
16use crate::util::range::range_to_indices;
17use crate::util::search::{pgm_add_eps, pgm_sub_eps};
18
19#[derive(Clone, Debug)]
37#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
38#[cfg_attr(
39 feature = "serde",
40 serde(bound = "T::Key: serde::Serialize + serde::de::DeserializeOwned")
41)]
42pub struct OneLevel<T: Indexable> {
43 epsilon: usize,
44 len: usize,
45 segments: Vec<Segment<T::Key>>,
46}
47
48impl<T: Indexable> OneLevel<T>
49where
50 T::Key: Ord,
51{
52 pub fn new(data: &[T], epsilon: usize) -> Result<Self, Error> {
54 if data.is_empty() {
55 return Err(Error::EmptyInput);
56 }
57 if epsilon == 0 {
58 return Err(Error::InvalidEpsilon);
59 }
60
61 debug_assert!(
62 data.windows(2)
63 .all(|w| w[0].index_key() <= w[1].index_key()),
64 "data must be sorted by index key"
65 );
66
67 let keys: Vec<T::Key> = data.iter().map(|v| v.index_key()).collect();
68 let segments = build_segments(&keys, epsilon);
69
70 Ok(Self {
71 segments,
72 epsilon,
73 len: keys.len(),
74 })
75 }
76
77 #[inline]
78 fn find_segment(&self, key: &T::Key) -> usize {
79 let pos = self
80 .segments
81 .partition_point(|s| s.key <= *key)
82 .saturating_sub(1);
83 pos.min(self.segments.len().saturating_sub(1))
84 }
85
86 #[inline]
88 pub fn search(&self, value: &T) -> ApproxPos {
89 let key = value.index_key();
90 self.search_by_key(&key)
91 }
92
93 #[inline]
95 pub fn search_by_key(&self, key: &T::Key) -> ApproxPos {
96 let seg_idx = self.find_segment(key);
97 let segment = &self.segments[seg_idx];
98
99 let pos = segment.predict(*key).min(self.len.saturating_sub(1));
100 let lo = pgm_sub_eps(pos, self.epsilon);
101 let hi = pgm_add_eps(pos, self.epsilon, self.len);
102
103 ApproxPos::new(pos, lo, hi)
104 }
105
106 #[inline]
108 pub fn lower_bound(&self, data: &[T], value: &T) -> usize
109 where
110 T: Ord,
111 {
112 let key = value.index_key();
113 let approx = self.search_by_key(&key);
114 let len = data.len();
115
116 if len == 0 {
117 return 0;
118 }
119
120 let pos = approx.pos.min(len - 1);
121 let k = &data[pos];
122
123 if *k == *value {
124 let mut i = pos;
125 while i > 0 && data[i - 1] == *value {
126 i -= 1;
127 }
128 return i;
129 }
130
131 if *k < *value {
132 if pos + 1 < len && data[pos + 1] >= *value {
133 return pos + 1;
134 }
135 } else if pos > 0 && data[pos - 1] < *value {
136 return pos;
137 }
138
139 let lo = approx.lo.saturating_sub(self.epsilon);
142 let hi = (approx.hi + self.epsilon).min(len);
143 let slice = &data[lo..hi];
144 lo + slice.partition_point(|x| x < value)
145 }
146
147 #[inline]
149 pub fn upper_bound(&self, data: &[T], value: &T) -> usize
150 where
151 T: Ord,
152 {
153 let idx = self.lower_bound(data, value);
154 let mut i = idx;
155 while i < data.len() && data[i] == *value {
156 i += 1;
157 }
158 i
159 }
160
161 #[inline]
163 pub fn contains(&self, data: &[T], value: &T) -> bool
164 where
165 T: Ord,
166 {
167 let idx = self.lower_bound(data, value);
168 idx < data.len() && data[idx] == *value
169 }
170
171 #[inline]
172 pub fn len(&self) -> usize {
173 self.len
174 }
175
176 #[inline]
177 pub fn is_empty(&self) -> bool {
178 self.len == 0
179 }
180
181 #[inline]
182 pub fn segments_count(&self) -> usize {
183 self.segments.len()
184 }
185
186 #[inline]
187 pub fn epsilon(&self) -> usize {
188 self.epsilon
189 }
190
191 pub fn size_in_bytes(&self) -> usize {
192 core::mem::size_of::<Self>()
193 + self.segments.capacity() * core::mem::size_of::<Segment<T::Key>>()
194 }
195
196 #[inline]
198 pub fn range_indices<R>(&self, data: &[T], range: R) -> (usize, usize)
199 where
200 T: Ord,
201 R: RangeBounds<T>,
202 {
203 range_to_indices(
204 range,
205 data.len(),
206 |v| self.lower_bound(data, v),
207 |v| self.upper_bound(data, v),
208 )
209 }
210
211 #[inline]
213 pub fn range<'a, R>(&self, data: &'a [T], range: R) -> impl DoubleEndedIterator<Item = &'a T>
214 where
215 T: Ord,
216 R: RangeBounds<T>,
217 {
218 let (start, end) = self.range_indices(data, range);
219 data[start..end].iter()
220 }
221}
222
223impl<T: Indexable> crate::index::External<T> for OneLevel<T>
224where
225 T::Key: Ord,
226{
227 #[inline]
228 fn search(&self, value: &T) -> ApproxPos {
229 self.search(value)
230 }
231
232 #[inline]
233 fn lower_bound(&self, data: &[T], value: &T) -> usize
234 where
235 T: Ord,
236 {
237 self.lower_bound(data, value)
238 }
239
240 #[inline]
241 fn upper_bound(&self, data: &[T], value: &T) -> usize
242 where
243 T: Ord,
244 {
245 self.upper_bound(data, value)
246 }
247
248 #[inline]
249 fn contains(&self, data: &[T], value: &T) -> bool
250 where
251 T: Ord,
252 {
253 self.contains(data, value)
254 }
255
256 #[inline]
257 fn len(&self) -> usize {
258 self.len()
259 }
260
261 #[inline]
262 fn segments_count(&self) -> usize {
263 self.segments_count()
264 }
265
266 #[inline]
267 fn epsilon(&self) -> usize {
268 self.epsilon()
269 }
270
271 #[inline]
272 fn size_in_bytes(&self) -> usize {
273 self.size_in_bytes()
274 }
275}
276
277#[cfg(test)]
278mod tests {
279 use super::*;
280 use alloc::vec;
281 use alloc::vec::Vec;
282
283 #[test]
284 fn test_one_level_basic() {
285 let keys: Vec<u64> = (0..1000).collect();
286 let index = OneLevel::new(&keys, 8).unwrap();
287
288 assert_eq!(index.len(), 1000);
289 assert!(!index.is_empty());
290 }
291
292 #[test]
293 fn test_one_level_search() {
294 let keys: Vec<u64> = (0..1000).collect();
295 let index = OneLevel::new(&keys, 8).unwrap();
296
297 for &key in &[0u64, 100, 500, 999] {
298 let idx = index.lower_bound(&keys, &key);
299 assert_eq!(idx, key as usize);
300 }
301 }
302
303 #[test]
304 fn test_one_level_contains() {
305 let keys: Vec<u64> = (0..100).map(|i| i * 2).collect();
306 let index = OneLevel::new(&keys, 8).unwrap();
307
308 assert!(index.contains(&keys, &0));
309 assert!(index.contains(&keys, &50));
310 assert!(!index.contains(&keys, &1));
311 assert!(!index.contains(&keys, &51));
312 }
313
314 #[test]
315 fn test_one_level_empty_error() {
316 let keys: Vec<u64> = vec![];
317 let result = OneLevel::new(&keys, 8);
318 assert!(matches!(result, Err(Error::EmptyInput)));
319 }
320}