1use epserde::prelude::*;
13
14#[derive(Clone, Copy, Debug)]
16pub struct DecodedOffset {
17 pub absolute_offset: u64,
19 pub relative_offset: u64,
21}
22
23impl DecodedOffset {
24 pub fn new(absolute_offset: u64, relative_offset: u64) -> Self {
26 Self {
27 absolute_offset,
28 relative_offset,
29 }
30 }
31}
32
33#[derive(Clone, Debug, Epserde)]
37pub struct OffsetsVector {
38 offsets: Vec<u64>,
40}
41
42impl OffsetsVector {
43 pub fn new() -> Self {
45 Self {
46 offsets: vec![0], }
48 }
49
50 #[inline]
52 pub fn push(&mut self, offset: u64) {
53 self.offsets.push(offset);
54 }
55
56 #[inline]
58 pub fn access(&self, i: usize) -> u64 {
59 assert!(i < self.offsets.len(), "Offset index {} out of bounds", i);
60 self.offsets[i]
61 }
62
63 #[inline]
65 pub fn decode(&self, absolute_offset: u64) -> DecodedOffset {
66 DecodedOffset::new(absolute_offset, absolute_offset)
67 }
68
69 #[inline]
71 pub fn len(&self) -> usize {
72 self.offsets.len()
73 }
74
75 #[inline]
77 pub fn is_empty(&self) -> bool {
78 self.offsets.is_empty()
79 }
80
81 #[inline]
83 pub fn num_bytes(&self) -> u64 {
84 (self.offsets.len() * 8) as u64
85 }
86
87 #[inline]
89 pub fn num_bits(&self) -> u64 {
90 (self.offsets.len() * 64) as u64
91 }
92
93 #[inline]
97 pub fn locate(&self, pos: u64) -> Option<(u64, u64)> {
98 let n = self.offsets.len();
99 if n < 2 {
100 return None;
101 }
102
103 let idx = self.offsets.partition_point(|&x| x <= pos);
106
107 if idx == 0 {
109 return None;
110 }
111 let string_id = idx - 1;
112
113 if string_id + 1 < n {
115 Some((string_id as u64, self.offsets[string_id]))
116 } else {
117 None
118 }
119 }
120
121 #[inline]
126 pub fn locate_branchless(&self, pos: u64) -> Option<(u64, u64)> {
127 let n = self.offsets.len();
128 if n < 2 {
129 return None;
130 }
131
132 let data = self.offsets.as_slice();
133
134 let mut lo: usize = 0;
136 let mut size = n;
137 while size > 1 {
138 let half = size / 2;
139 let mid = lo + half;
140 lo = if unsafe { *data.get_unchecked(mid) } <= pos { mid } else { lo };
143 size -= half;
144 }
145
146 if unsafe { *data.get_unchecked(lo) } > pos {
148 return None;
149 }
150 if lo + 1 < n {
151 Some((lo as u64, unsafe { *data.get_unchecked(lo) }))
152 } else {
153 None
154 }
155 }
156}
157
158impl Default for OffsetsVector {
159 fn default() -> Self {
160 Self::new()
161 }
162}
163
164#[cfg(test)]
165mod tests {
166 use super::*;
167
168 #[test]
169 fn test_offsets_vector_creation() {
170 let offsets = OffsetsVector::new();
171 assert_eq!(offsets.len(), 1);
172 assert_eq!(offsets.access(0), 0);
173 }
174
175 #[test]
176 fn test_offsets_vector_push() {
177 let mut offsets = OffsetsVector::new();
178 offsets.push(100);
179 offsets.push(200);
180 offsets.push(300);
181
182 assert_eq!(offsets.len(), 4);
183 assert_eq!(offsets.access(0), 0);
184 assert_eq!(offsets.access(1), 100);
185 assert_eq!(offsets.access(2), 200);
186 assert_eq!(offsets.access(3), 300);
187 }
188
189 #[test]
190 fn test_offsets_vector_decode() {
191 let offsets = OffsetsVector::new();
192 let decoded = offsets.decode(50);
193 assert_eq!(decoded.absolute_offset, 50);
194 assert_eq!(decoded.relative_offset, 50);
195 }
196
197 #[test]
198 fn test_decoded_offset_creation() {
199 let decoded = DecodedOffset::new(1000, 500);
200 assert_eq!(decoded.absolute_offset, 1000);
201 assert_eq!(decoded.relative_offset, 500);
202 }
203}
204
205use sux::dict::elias_fano::{EliasFanoBuilder, EfSeqDict};
210use sux::traits::{IndexedSeq, Succ};
211use sux::traits::iter::BidiIterator;
212use mem_dbg::{MemSize, SizeFlags};
213
214pub struct EliasFanoOffsets {
228 ef: EfSeqDict,
230}
231
232impl EliasFanoOffsets {
233 pub fn from_vec(offsets: &[u64]) -> Self {
235 let n = offsets.len();
236 let u = if n > 0 { offsets[n - 1] as usize + 1 } else { 1 };
237 let mut builder = EliasFanoBuilder::new(n, u);
238 for &v in offsets {
239 builder.push(v as usize);
240 }
241 Self { ef: builder.build_with_seq_and_dict() }
242 }
243
244 pub fn from_offsets_vector(ov: OffsetsVector) -> Self {
246 Self::from_vec(&ov.offsets)
247 }
248
249 #[inline]
251 pub fn access(&self, i: usize) -> u64 {
252 unsafe { self.ef.get_unchecked(i) as u64 }
254 }
255
256 #[inline]
263 pub fn locate_with_end(&self, pos: u64) -> Option<(u64, u64, u64)> {
264 let n = self.ef.len();
265 if n < 2 {
266 return None;
267 }
268
269 let (idx, mut iter) = self.ef.iter_bidi_from_succ(pos as usize)?;
272
273 let val = iter.next()?;
275
276 if val == pos as usize {
277 if idx + 1 < n {
280 let end = iter.next()? as u64;
281 Some((idx as u64, pos, end))
282 } else {
283 None
284 }
285 } else {
286 debug_assert!(idx > 0);
291 iter.prev(); let begin = iter.prev()? as u64; Some(((idx - 1) as u64, begin, val as u64))
294 }
295 }
296
297 #[inline]
304 pub fn locate(&self, pos: u64) -> Option<(u64, u64)> {
305 let n = self.ef.len();
306 if n < 2 {
307 return None;
308 }
309
310 let (idx, mut iter) = self.ef.iter_bidi_from_succ(pos as usize)?;
311 let val = iter.next()?;
312
313 if val == pos as usize {
314 if idx + 1 < n {
316 Some((idx as u64, pos))
317 } else {
318 None
319 }
320 } else {
321 debug_assert!(idx > 0);
323 iter.prev(); let string_begin = iter.prev()? as u64; Some(((idx - 1) as u64, string_begin))
326 }
327 }
328
329 #[inline]
331 pub fn len(&self) -> usize {
332 self.ef.len()
333 }
334
335 #[inline]
337 pub fn is_empty(&self) -> bool {
338 self.ef.len() == 0
339 }
340
341 #[inline]
343 pub fn num_bytes(&self) -> u64 {
344 self.ef.mem_size(SizeFlags::default()) as u64
345 }
346
347 #[inline]
349 pub fn num_bits(&self) -> u64 {
350 self.num_bytes() * 8
351 }
352
353 pub fn write_to<W: std::io::Write>(&self, writer: &mut W) -> std::io::Result<()> {
355 unsafe {
356 self.ef.serialize(writer)
357 .map_err(std::io::Error::other)?;
358 }
359 Ok(())
360 }
361
362 pub fn read_from<R: std::io::Read>(reader: &mut R) -> std::io::Result<Self> {
364 let ef = unsafe {
365 EfSeqDict::deserialize_full(reader)
366 .map_err(std::io::Error::other)?
367 };
368 Ok(Self { ef })
369 }
370}
371
372impl std::fmt::Debug for EliasFanoOffsets {
373 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
374 f.debug_struct("EliasFanoOffsets")
375 .field("len", &self.ef.len())
376 .finish()
377 }
378}
379
380#[cfg(test)]
381mod ef_tests {
382 use super::*;
383
384 #[test]
385 fn test_ef_from_vec() {
386 let offsets = vec![0, 100, 200, 300, 400];
387 let ef = EliasFanoOffsets::from_vec(&offsets);
388 assert_eq!(ef.len(), 5);
389 assert_eq!(ef.access(0), 0);
390 assert_eq!(ef.access(1), 100);
391 assert_eq!(ef.access(2), 200);
392 assert_eq!(ef.access(3), 300);
393 assert_eq!(ef.access(4), 400);
394 }
395
396 #[test]
397 fn test_ef_locate() {
398 let offsets = vec![0, 100, 200, 300, 400];
399 let ef = EliasFanoOffsets::from_vec(&offsets);
400
401 assert_eq!(ef.locate(50), Some((0, 0)));
403 assert_eq!(ef.locate(100), Some((1, 100)));
405 assert_eq!(ef.locate(199), Some((1, 100)));
407 assert_eq!(ef.locate(300), Some((3, 300)));
409 assert_eq!(ef.locate(399), Some((3, 300)));
411 assert_eq!(ef.locate(400), None);
413 }
414
415 #[test]
416 fn test_ef_locate_with_end() {
417 let offsets = vec![0, 100, 200, 300, 400];
418 let ef = EliasFanoOffsets::from_vec(&offsets);
419
420 assert_eq!(ef.locate_with_end(50), Some((0, 0, 100)));
422 assert_eq!(ef.locate_with_end(100), Some((1, 100, 200)));
424 assert_eq!(ef.locate_with_end(199), Some((1, 100, 200)));
426 assert_eq!(ef.locate_with_end(300), Some((3, 300, 400)));
428 assert_eq!(ef.locate_with_end(399), Some((3, 300, 400)));
430 assert_eq!(ef.locate_with_end(400), None);
432 }
433
434 #[test]
435 fn test_ef_serialization_roundtrip() {
436 let offsets = vec![0, 100, 200, 300, 400];
437 let ef = EliasFanoOffsets::from_vec(&offsets);
438
439 let mut buf = Vec::new();
440 ef.write_to(&mut buf).unwrap();
441
442 let ef2 = EliasFanoOffsets::read_from(&mut &buf[..]).unwrap();
443 assert_eq!(ef2.len(), 5);
444 for i in 0..5 {
445 assert_eq!(ef.access(i), ef2.access(i));
446 }
447 assert_eq!(ef2.locate(150), Some((1, 100)));
448 assert_eq!(ef2.locate_with_end(150), Some((1, 100, 200)));
449 }
450
451 #[test]
454 fn test_ef_locate_with_end_stress() {
455 let mut offsets = vec![0u64];
457 let gaps = [3, 7, 1, 15, 2, 100, 5, 31, 8, 63, 4, 127, 6, 255, 10, 50,
458 1, 1, 1, 33, 200, 9, 17, 3, 11, 500, 2, 7, 13, 41];
459 for &g in gaps.iter() {
460 offsets.push(offsets.last().unwrap() + g);
461 }
462 let ef = EliasFanoOffsets::from_vec(&offsets);
463
464 for (i, &v) in offsets.iter().enumerate() {
466 assert_eq!(ef.access(i), v, "access({i}) mismatch");
467 }
468
469 let universe = *offsets.last().unwrap();
471 for pos in 0..=universe {
472 let expected = {
473 let mut found = None;
475 for i in 0..offsets.len() - 1 {
476 if offsets[i] <= pos && pos < offsets[i + 1] {
477 found = Some((i as u64, offsets[i], offsets[i + 1]));
478 break;
479 }
480 }
481 found
482 };
483 let got = ef.locate_with_end(pos);
484 assert_eq!(got, expected, "locate_with_end({pos}) mismatch");
485 }
486
487 assert_eq!(ef.locate_with_end(universe), None);
489 assert_eq!(ef.locate_with_end(universe + 1), None);
490 }
491
492 #[test]
494 fn test_ef_locate_large_universe() {
495 let offsets: Vec<u64> = vec![0, 1000, 5000, 5001, 5002, 10000, 100000, 100001, 500000];
496 let ef = EliasFanoOffsets::from_vec(&offsets);
497
498 let test_positions: Vec<u64> = {
500 let mut v = Vec::new();
501 for &off in &offsets {
502 if off > 0 { v.push(off - 1); }
503 v.push(off);
504 v.push(off + 1);
505 }
506 v.extend_from_slice(&[500, 3000, 5000, 7500, 50000, 200000, 400000]);
508 v.sort();
509 v.dedup();
510 v
511 };
512
513 for pos in test_positions {
514 let expected = {
515 let mut found = None;
516 for i in 0..offsets.len() - 1 {
517 if offsets[i] <= pos && pos < offsets[i + 1] {
518 found = Some((i as u64, offsets[i], offsets[i + 1]));
519 break;
520 }
521 }
522 found
523 };
524 let got = ef.locate_with_end(pos);
525 assert_eq!(got, expected, "locate_with_end({pos}) mismatch");
526 }
527 }
528}