cseq/
elias_fano.rs

1//! Elias-Fano representation of a non-decreasing sequence of integers.
2
3use std::{io, iter::FusedIterator, ops::{Deref, DerefMut}};
4
5use binout::{AsIs, Serializer};
6use bitm::{ceiling_div, n_lowest_bits, RankSelect101111, BitAccess, BitVec, CombinedSampling, ConstCombinedSamplingDensity, Rank, Select, Select0, Select0ForRank101111, SelectForRank101111};
7use dyn_size_of::GetSize;
8
9/// Builds [`Sequence`] of values added by push methods.
10/// After adding values in non-decreasing order by [`Self::push`] method,
11/// [`Self::finish`] can be called to construct [`Sequence`].
12pub struct Builder<BV = Box<[u64]>> {
13    hi: BV, // most significant bits of each item, unary coded
14    lo: Box<[u64]>, // least significant bits of each item, vector of `bits_per_lo_entry` bit items
15    bits_per_lo: u8,  // bit size of each entry in lo
16    current_len: usize,  // number of already pushed items
17    target_len: usize,   // total number of items to push
18    last_added: u64, // value of recently pushed item
19    universe: u64   // all pushed items must be in range [`0`, `universe`)
20}
21
22impl<BV> Builder<BV> {
23    /// Returns declared *universe*. All pushed items must be in range [0, *universe*).
24    #[inline] pub fn universe(&self) -> u64 { self.universe }
25
26    /// Returns number of already pushed items.
27    #[inline] pub fn current_len(&self) -> usize { self.current_len }
28
29    /// Returns total number of items to push.
30    #[inline] pub fn target_len(&self) -> usize { self.target_len }
31
32    /// Returns value of recently pushed item (if any) or 0.
33    #[inline] pub fn last_pushed(&self) -> u64 { self.last_added }
34}
35
36impl<BV: BitVec> Builder<BV> {
37    /// Constructs [`Builder`] to build [`Sequence`] with custom bit vector type and
38    /// `final_len` values in range [`0`, `universe`).
39    /// After adding values in non-decreasing order by [`Self::push`] method,
40    /// [`Self::finish`] can be called to construct [`Sequence`].
41    pub fn new_b(final_len: usize, universe: u64) -> Self {
42        if final_len == 0 || universe == 0 {
43            return Self { hi: BV::with_64bit_segments(0, 0), lo: Default::default(), bits_per_lo: 0, current_len: 0, target_len: 0, last_added: 0, universe };
44        }
45        let bits_per_lo = (universe / final_len as u64).checked_ilog2().unwrap_or(0) as u8;
46        Self {
47            // adding the last (i.e. (final_len-1)-th) item with value universe-1 sets bit (final_len-1) + ((universe-1) >> bits_per_lo)
48            hi: BV::with_zeroed_bits(final_len + ((universe-1) >> bits_per_lo) as usize),
49            lo: Box::with_zeroed_bits(1.max(final_len * bits_per_lo as usize)),
50            bits_per_lo,
51            current_len: 0,
52            target_len: final_len,
53            last_added: 0,
54            universe,
55        }
56    }
57}
58
59impl Builder {
60    /// Constructs [`Builder`] to build [`Sequence`] with `final_len` values in range [`0`, `universe`).
61    /// After adding values in non-decreasing order by [`Self::push`] method,
62    /// [`Self::finish`] can be called to construct [`Sequence`].
63    #[inline] pub fn new(final_len: usize, universe: u64) -> Self {
64        Self::new_b(final_len, universe)
65    }
66}
67
68impl<BV: DerefMut<Target = [u64]>> Builder<BV> {
69    /// A version of [`Self::push`] without any checks and panic.
70    pub unsafe fn push_unchecked(&mut self, value: u64) {
71        self.hi.set_bit((value>>self.bits_per_lo) as usize + self.current_len);
72        self.lo.init_successive_fragment(&mut self.current_len, value & n_lowest_bits(self.bits_per_lo), self.bits_per_lo);
73        self.last_added = value;
74    }
75
76    /// Pushes a value that is `diff` greater than the previous one, or from 0 if pushing the first value.
77    /// A version of [`Self::push_diff`] without any checks and panic.
78    pub unsafe fn push_diff_unchecked(&mut self, diff: u64) {
79        self.push_unchecked(self.last_added+diff)
80    }
81
82    /// Pushes a `value`. It must be greater than or equal to previous one, and less than universe.
83    /// Otherwise, or in case of an attempt to push too many items, panics.
84    pub fn push(&mut self, value: u64) {
85        assert!(value < self.universe, "Elias-Fano Builder: cannot push value {value} outside the universe (<{})", self.universe);
86        assert!(self.current_len < self.target_len, "Elias-Fano Builder: push exceeds the declared length of {} values", self.target_len);
87        assert!(self.last_added <= value, "Elias-Fano Builder: values must be pushed in non-decreasing order, but received {value} after {}", self.last_added);
88        unsafe { self.push_unchecked(value) }
89    }
90
91    /// Pushes a value that is `diff` greater than the previous one, or from 0 if pushing the first value.
92    /// Panics if the pushed item is not less than universe or all declared items has been already pushed.
93    pub fn push_diff(&mut self, diff: u64) {
94        self.push(self.last_added.saturating_add(diff))
95    }
96
97    /// Pushes all `values`. Calls [`Self::push`] for all `values` items.
98    pub fn push_all<I: IntoIterator<Item = u64>>(&mut self, values: I) {
99        for value in values { self.push(value) }
100    }
101
102    /// Calls [`Self::push_diff`] for all `diffs` items.
103    pub fn push_diffs<I: IntoIterator<Item = u64>>(&mut self, diffs: I) {
104        for diff in diffs { self.push_diff(diff) }
105    }
106}
107
108impl<BV: Deref<Target = [u64]>> Builder<BV> {
109    /// Finishes building and returns [`Sequence`] containing the pushed items and custom select strategy.
110    /// The resulted [`Sequence`] is invalid if not all declared items have been pushed.
111    pub fn finish_unchecked_s<S: SelectForRank101111, S0: Select0ForRank101111>(self) -> Sequence<S, S0, BV> {
112        Sequence::<S, S0, BV> {
113            hi: self.hi.into(),
114            lo: self.lo,
115            bits_per_lo: self.bits_per_lo,
116            len: self.current_len,
117        }
118    }
119
120    /// Finishes building and returns [`Sequence`] containing the pushed items and custom select strategy.
121    /// Panics if not all declared items have been pushed. 
122    pub fn finish_s<S: SelectForRank101111, S0: Select0ForRank101111>(self) -> Sequence<S, S0, BV> {
123        assert_eq!(self.current_len, self.target_len, "Cannot finish building Elias-Fano Sequence as the current length ({}) differs from the target ({})", self.current_len, self.target_len);
124        self.finish_unchecked_s::<S, S0>()
125    }
126
127    /// Finishes building and returns [`Sequence`] containing the pushed items.
128    /// The resulted [`Sequence`] is invalid if not all declared items have been pushed.
129    #[inline] pub fn finish_unchecked(self) -> Sequence<DefaultSelectStrategy, DefaultSelectStrategy, BV> {
130        self.finish_unchecked_s()
131    }
132
133    /// Finishes building and returns [`Sequence`] containing the pushed items.
134    /// Panics if not all declared items have been pushed. 
135    #[inline] pub fn finish(self) -> Sequence<DefaultSelectStrategy, DefaultSelectStrategy, BV> {
136        self.finish_s()
137    }
138}
139
140/// Default select strategy for Elias-Fano [`Sequence`].
141pub type DefaultSelectStrategy = CombinedSampling<ConstCombinedSamplingDensity>;
142
143/// Elias-Fano representation of a non-decreasing sequence of integers.
144/// 
145/// By default [`bitm::CombinedSampling`] is used used as both a select and select0 strategy
146/// for internal bit vector (see [`bitm::RankSelect101111`]).
147/// However, either of these two strategies can be changed to [`bitm::BinaryRankSearch`]
148/// to save a bit of space (maximum about 0.39% per strategy) at the cost of slower:
149/// - (for select) getting an item at the given index,
150/// - (for select0) finding the index of an item with a value (exactly or at least) equal to the given.
151/// 
152/// The structure was invented by Peter Elias and, independently, Robert Fano:
153/// - Peter Elias "Efficient storage and retrieval by content and address of static files",
154///   J. ACM 21 (2) (1974) 246–260. doi:10.1145/321812.321820.
155/// - Robert Mario Fano "On the number of bits required to implement an associative memory",
156///   Memorandum 61, Computer Structures Group, Project MAC, MIT, Cambridge, Mass., nd (1971) 27.
157/// 
158/// Our implementation draws ideas from:
159/// - Sebastiano Vigna "Quasi-succinct indices", 2013,
160///   In Proceedings of the sixth ACM international conference on Web search and data mining (WSDM '13),
161///   Association for Computing Machinery, New York, NY, USA, 83–92. <https://doi.org/10.1145/2433396.2433409>
162/// - Daisuke Okanohara, Kunihiko Sadakane, "Practical entropy-compressed rank/select dictionary",
163///   Proceedings of the Meeting on Algorithm Engineering & Expermiments, January 2007, pages 60–70,
164///   <https://dl.acm.org/doi/10.5555/2791188.2791194> (Section 6 "SDarrays")
165pub struct Sequence<S = DefaultSelectStrategy, S0 = DefaultSelectStrategy, BV=Box<[u64]>> {
166    hi: RankSelect101111<S, S0, BV>,   // most significant bits of each item, unary coded
167    lo: Box<[u64]>, // least significant bits of each item, vector of `bits_per_lo` bit items
168    bits_per_lo: u8, // bit size of each entry in lo
169    len: usize  // number of items
170}
171
172impl<S, S0, BV> Sequence<S, S0, BV> {
173    /// Returns number of stored values.
174    #[inline] pub fn len(&self) -> usize { self.len }
175
176    /// Returns whether the sequence is empty.
177    #[inline] pub fn is_empty(&self) -> bool { self.len == 0 }
178
179    #[inline] pub fn bits_per_lo(&self) -> u8 { self.bits_per_lo }
180}
181
182impl<S, S0, BV: Deref<Target = [u64]>> Sequence<S, S0, BV> {
183    /// Advance `position` by 1 forward. The result is undefined if `position` is invalid.
184    #[inline] unsafe fn advance_position_unchecked(&self, position: &mut Position) {
185        position.lo += 1;
186        position.hi = if position.lo != self.len {
187            self.hi.content.find_bit_one_unchecked(position.hi+1)
188        } else {
189            self.hi.content.len() * 64
190        }
191    }
192
193    /// Advance `position` by 1 backward. The result is undefined if `position` points to index 0.
194    #[inline] unsafe fn advance_position_back_unchecked(&self, position: &mut Position) {
195        position.lo -= 1;
196        position.hi = self.hi.content.rfind_bit_one_unchecked(position.hi-1);
197    }
198
199    /// Returns value at `position` and next advance `position`. The result is undefined if `position` is invalid.
200    #[inline] unsafe fn position_next_unchecked(&self, position: &mut Position) -> u64 {
201        let result = self.value_at_position_unchecked(*position);
202        self.advance_position_unchecked(position);
203        result
204    }
205
206    /// If the `position` is valid, returns its value and next advances it. Otherwise returns [`None`].
207    #[inline] fn position_next(&self, position: &mut Position) -> Option<u64> {
208        (position.lo != self.len).then(|| unsafe { self.position_next_unchecked(position) })
209    }
210
211    #[inline] unsafe fn value_at_position_unchecked(&self, position: Position) -> u64 {
212        position.hi_bits() << self.bits_per_lo | self.lo.get_fragment(position.lo, self.bits_per_lo)
213    }
214
215    /// Returns difference between the value of given and the previous positions.
216    /// The result is undefined if the `position` is invalid.
217    #[inline] unsafe fn diff_at_position_unchecked(&self, mut position: Position) -> u64 {
218        let current = self.value_at_position_unchecked(position);
219        if position.lo == 0 { return current; }
220        self.advance_position_back_unchecked(&mut position);
221        current - self.value_at_position_unchecked(position)
222    }
223
224    /// Returns difference between the value of given and the previous positions.
225    /// Returns [`None`] if the `position` is invalid.
226    #[inline] fn diff_at_position(&self, position: Position) -> Option<u64> {
227        (position.lo != self.len).then(|| unsafe { self.diff_at_position_unchecked(position) })
228    }
229
230    /// Returns value at given `position` or [`None`] if the `position` is invalid.
231    #[inline] fn value_at_position(&self, position: Position) -> Option<u64> {
232        (position.lo != self.len).then(|| unsafe { self.value_at_position_unchecked(position) })
233    }
234
235    /// Returns position that points to the first item of `self`.
236    #[inline] fn begin_position(&self) -> Position {
237        Position { hi: self.hi.content.trailing_zero_bits(), lo: 0 }
238    }
239
240    /// Returns position that points past the end.
241    #[inline] fn end_position(&self) -> Position {
242        Position { hi: self.hi.content.len() * 64, lo: self.len }
243    }
244
245    /// Converts `position` to [`Cursor`].
246    #[inline] fn cursor(&'_ self, position: Position) -> Cursor<'_, S, S0, BV> {
247        Cursor { sequence: &self, position }
248    }
249
250    /// Returns iterator over `self` values.
251    #[inline] pub fn iter(&'_ self) -> Iterator<'_, S, S0, BV> {
252        Iterator { sequence: self, begin: self.begin_position(), end: self.end_position() } 
253    }
254
255    /// Returns an iterator that gives the value of the first item followed by
256    /// the differences between the values of subsequent items.
257    #[inline] pub fn diffs(&'_ self) -> DiffIterator<'_, S, S0, BV> {
258        DiffIterator { sequence: self, position: self.begin_position(), prev_value: 0 } 
259    }
260
261    /// Returns cursor that points to the first item of `self`.
262    #[inline] pub fn begin(&'_ self) -> Cursor<'_, S, S0, BV> {
263        self.cursor(self.begin_position())
264    }
265    
266    /// Returns cursor that points past the end.
267    #[inline] pub fn end(&'_ self) -> Cursor<'_, S, S0, BV> {
268        self.cursor(self.end_position())
269    }
270
271    /// Returns number of bytes which `write` will write.
272    pub fn write_bytes(&self) -> usize {
273        AsIs::size(self.bits_per_lo) +
274        AsIs::array_size(&self.hi.content) +
275        if self.bits_per_lo != 0 && self.len != 0 { AsIs::array_content_size(&self.lo) } else { 0 }
276    }
277
278    /// Writes `self` to the `output`.
279    pub fn write(&self, output: &mut dyn io::Write) -> io::Result<()>
280    {
281        AsIs::write(output, self.bits_per_lo)?;
282        AsIs::write_array(output, &self.hi.content)?;
283        if self.bits_per_lo != 0 && self.len != 0 { AsIs::write_all(output, self.lo.iter())? };
284        Ok(())
285    }
286}
287
288impl Sequence {
289    /// Constructs [`Sequence`] filled with elements from the `items` slice, which must be in non-decreasing order.
290    #[inline] pub fn with_items_from_slice<I: Into<u64> + Clone>(items: &[I]) -> Self {
291        Self::with_items_from_slice_s(items)
292    }
293
294    /// Reads `self` from the `input`.
295    pub fn read(input: &mut dyn io::Read) -> io::Result<Self> {
296        Self::read_s(input)
297    }
298}
299
300impl<S: SelectForRank101111, S0: Select0ForRank101111, BV: Deref<Target = [u64]>+FromIterator<u64>> Sequence<S, S0, BV> {
301
302    /// Reads `self` from the `input`.
303    /// 
304    /// Custom select strategies do not have to be the same as the ones used by the written sequence.
305    pub fn read_s(input: &mut dyn io::Read) -> io::Result<Self> {
306        let bits_per_lo: u8 = AsIs::read(input)?;
307        let hi: BV = <AsIs as Serializer<u64>>::read_array_iter(input)?.collect::<io::Result<BV>>()?;
308        let (hi, len) = RankSelect101111::build(hi);
309        let lo = if bits_per_lo != 0 && len != 0 {
310            AsIs::read_n(input, ceiling_div(len * bits_per_lo as usize, 64))?
311        } else {
312            (if len == 0 { vec![] } else { vec![0] }).into_boxed_slice()
313        };
314        Ok(Self { hi, lo, bits_per_lo, len })
315    }
316}
317
318impl<S: SelectForRank101111, S0: Select0ForRank101111, BV: BitVec+DerefMut<Target = [u64]>> Sequence<S, S0, BV> {
319    /// Constructs [`Sequence`] with custom select strategy and
320    /// filled with elements from the `items` slice, which must be in non-decreasing order.
321    pub fn with_items_from_slice_s<I: Into<u64> + Clone>(items: &[I]) -> Self {
322        let mut b = Builder::<BV>::new_b(items.len(), items.last().map_or(0, |v| v.clone().into()+1));
323        b.push_all(items.iter().map(|v| v.clone().into()));
324        b.finish_unchecked_s()
325    }
326}
327
328impl<S: SelectForRank101111, S0, BV: Deref<Target = [u64]>> Sequence<S, S0, BV> {
329    /// Returns value at given `index`. The result is undefined if `index` is out of bounds.
330    #[inline] pub unsafe fn get_unchecked(&self, index: usize) -> u64 {
331        (((unsafe{self.hi.select_unchecked(index)} - index) as u64) << self.bits_per_lo) |
332            self.lo.get_fragment(index, self.bits_per_lo)
333    }
334
335    /// Returns value at given `index` or [`None`] if `index` is out of bounds.
336    #[inline] pub fn get(&self, index: usize) -> Option<u64> {
337        (index < self.len).then(|| unsafe{self.get_unchecked(index)} )
338    }
339
340    /// Returns value at given `index` or panics if `index` is out of bounds.
341    pub fn get_or_panic(&self, index: usize) -> u64 {
342        self.get(index).expect("attempt to retrieve value for an index out of bounds of the Elias-Fano Sequence")
343    }
344
345    /// Returns difference between the value at given `index` and the previous value.
346    /// If `index` is 0, returns value at index 0,just like [`Self::get_unchecked`].
347    /// The result is undefined if `index` is out of bounds.
348    #[inline] pub unsafe fn diff_unchecked(&self, index: usize) -> u64 {
349        self.diff_at_position_unchecked(self.position_at_unchecked(index))
350    }
351
352    /// Returns difference between the value at given `index` and the previous value.
353    /// If `index` is 0, returns value at index 0, just like [`Self::get`].
354    /// Returns [`None`] if `index` is out of bounds.
355    #[inline] pub fn diff(&self, index: usize) -> Option<u64> {
356        (index < self.len).then(|| unsafe{self.diff_unchecked(index)})
357    }
358
359    /// Returns difference between the value at given `index` and the previous value.
360    /// If `index` is 0, returns value at index 0, just like [`Self::get_or_panic`].
361    /// Panics if `index` is out of bounds.
362    #[inline] pub fn diff_or_panic(&self, index: usize) -> u64 {
363        self.diff(index).expect("attempt to retrieve diff for an index out of bounds of the Elias-Fano Sequence")
364    }
365
366    #[inline] unsafe fn position_at_unchecked(&self, index: usize) -> Position {
367        Position { hi: self.hi.select_unchecked(index), lo: index }
368    }
369
370    /*#[inline] fn position_at(&self, index: usize) -> Option<Position> {
371        (index < self.len).then(|| unsafe { self.position_at_unchecked(index) })
372    }*/
373
374    /// Returns valid cursor that points to given `index` of `self`.
375    /// Result is undefined if `index` is out of bounds.
376    #[inline] pub unsafe fn cursor_at_unchecked(&'_ self, index: usize) -> Cursor<'_, S, S0, BV> {
377        self.cursor(self.position_at_unchecked(index))
378    }
379
380    /// Returns valid cursor that points to given `index` of `self`,
381    /// or [`None`] if `index` is out of bounds.
382    #[inline] pub unsafe fn cursor_at(&'_ self, index: usize) -> Option<Cursor<'_, S, S0, BV>> {
383        (index < self.len).then(|| unsafe { self.cursor_at_unchecked(index) })
384    }
385}
386
387impl<S, S0: Select0ForRank101111, BV: Deref<Target = [u64]>> Sequence<S, S0, BV> {
388    /// Returns the uncorrected position of first `self` item with value greater than or equal to given `value`.
389    /// The `hi` of result may need correction (moving forward to first 1 bit) if it is not an index of 1 bit.
390    /// `lo` is already correct.
391    fn geq_position_uncorrected(&self, value: u64) -> Position {
392        let value_hi = (value >> self.bits_per_lo) as usize;
393        let mut hi_index = self.hi.try_select0(value_hi).unwrap_or_else(|| self.hi.content.len() * 64);  // index of 0 just after our ones
394        // TODO do we always have such 0? maybe it is better to select0(value_hi-1) and next scan forward?
395        let mut lo_index = hi_index - value_hi;
396
397        let value_lo = value as u64 & n_lowest_bits(self.bits_per_lo);
398        // skiping values that has the same most significant bits but greater or equal lower bits, stop at value with lower less significant bits:
399        while lo_index > 0 && self.hi.content.get_bit(hi_index - 1) &&
400             value_lo <= self.lo.get_fragment(lo_index-1, self.bits_per_lo)
401        {
402            lo_index -= 1;
403            hi_index -= 1;
404        }
405        Position { hi: hi_index, lo: lo_index }
406    }
407
408    /// Returns the position of first `self` item with value greater than or equal to given `value`.
409    fn geq_position(&self, value: u64) -> Position {
410        let mut result = self.geq_position_uncorrected(value);
411        result.hi = self.hi.content.find_bit_one(result.hi).unwrap_or_else(|| self.hi.content.len() * 64);
412        result
413    }
414
415    fn position_of(&self, value: u64) -> Option<Position> {
416        let geq_position = self.geq_position(value);
417        self.value_at_position(geq_position).and_then(|geq_value| (geq_value==value).then_some(geq_position))
418    }
419
420    /// Returns the cursor pointed to the first `self` item with value greater than or equal to given `value`.
421    #[inline] pub fn geq_cursor(&'_ self, value: u64) -> Cursor<'_, S, S0, BV> {
422        self.cursor(self.geq_position(value))
423    }
424
425    /// Returns the index of the first `self` item with value greater than or equal to given `value`.
426    #[inline] pub fn geq_index(&self, value: u64) -> usize {
427        self.geq_position_uncorrected(value).lo
428    }
429
430    /// Returns the cursor pointing to the first occurrence of `value` or [`None`] if `self` does not contain `value`.
431    #[inline] pub fn cursor_of(&'_ self, value: u64) -> Option<Cursor<'_, S, S0, BV>> {
432        self.position_of(value).map(|position| self.cursor(position))
433    }
434
435    /// Returns the index of the first occurrence of `value` or [`None`] if `self` does not contain `value`.
436    #[inline] pub fn index_of(&self, value: u64) -> Option<usize> {
437        self.position_of(value).map(|p| p.lo)
438    }
439}
440
441impl<S: SelectForRank101111, S0, BV: Deref<Target = [u64]>> Select for Sequence<S, S0, BV> {
442    #[inline] unsafe fn select_unchecked(&self, rank: usize) -> usize {
443        self.get_unchecked(rank) as usize
444    }
445
446    #[inline] fn try_select(&self, rank: usize) -> Option<usize> {
447        self.get(rank).map(|v| v as usize)
448    }
449}
450
451impl<S, S0: Select0ForRank101111, BV: Deref<Target = [u64]>> Rank for Sequence<S, S0, BV> {
452    /// Returns the number of `self` items with values less than given `value`.
453    #[inline] unsafe fn rank_unchecked(&self, value: usize) -> usize {
454        self.geq_index(value as u64)
455    }
456    
457    /// Returns the number of `self` items with values less than given `value`.
458    #[inline] fn try_rank(&self, value: usize) -> Option<usize> {
459        Some(self.geq_index(value as u64))
460    }
461}
462
463impl<S, S0, BV> GetSize for Sequence<S, S0, BV> where RankSelect101111<S, S0, BV>: GetSize {
464    fn size_bytes_dyn(&self) -> usize { self.lo.size_bytes_dyn() + self.hi.size_bytes_dyn() }
465    const USES_DYN_MEM: bool = true;
466}
467
468impl<'ef, S, S0, BV: Deref<Target = [u64]>> IntoIterator for &'ef Sequence<S, S0, BV> {
469    type Item = u64;
470    type IntoIter = Iterator<'ef, S, S0, BV>;
471    #[inline] fn into_iter(self) -> Self::IntoIter { self.iter() }
472}
473
474/// Position in Elias-Fano [`Sequence`].
475/// Used internally by [`Iterator`] and [`Cursor`].
476#[derive(Clone, Copy)]
477struct Position {
478    hi: usize,
479    lo: usize
480}
481
482impl Position {
483    #[inline(always)] fn hi_bits(&self) -> u64 { (self.hi - self.lo) as u64 }
484}
485
486/// Iterator over [`Sequence`] values, returned by [`Sequence::iter`] .
487pub struct Iterator<'ef, S, S0, BV> {
488    sequence: &'ef Sequence<S, S0, BV>,
489    begin: Position,
490    end: Position
491}
492
493impl<S, S0, BV> Iterator<'_, S, S0, BV> {
494    /// Returns the [`Sequence`] over which `self` iterates.
495    pub fn sequence(&self) -> &Sequence<S, S0, BV> { self.sequence }
496
497    /// Returns index of the value about to return by `next`.
498    pub fn index(&self) -> usize { self.begin.lo }
499
500    /// Returns index of the value returned by last `next_back`.
501    pub fn back_index(&self) -> usize { self.begin.lo }
502}
503
504impl<S, S0, BV: Deref<Target = [u64]>> std::iter::Iterator for Iterator<'_, S, S0, BV> {
505    type Item = u64;
506
507    fn next(&mut self) -> Option<Self::Item> {
508        (self.begin.lo != self.end.lo).then(|| unsafe { self.sequence.position_next_unchecked(&mut self.begin) })
509    }
510}
511
512impl<S, S0, BV: Deref<Target = [u64]>> DoubleEndedIterator for Iterator<'_, S, S0, BV> {
513    fn next_back(&mut self) -> Option<Self::Item> {
514        (self.begin.lo != self.end.lo).then(|| unsafe {
515            self.sequence.advance_position_back_unchecked(&mut self.end);
516            self.sequence.value_at_position_unchecked(self.end)
517        })
518    }
519}
520
521impl<S, S0, BV: Deref<Target = [u64]>> FusedIterator for Iterator<'_, S, S0, BV> {}
522
523/// Iterator that yields the value of the first item followed by the differences
524/// between the values of subsequent items of [`Sequence`].
525pub struct DiffIterator<'ef, S, S0, BV> {
526    sequence: &'ef Sequence<S, S0, BV>,
527    position: Position,
528    prev_value: u64
529}
530
531impl<S, S0, BV> DiffIterator<'_, S, S0, BV> {
532    /// Returns the [`Sequence`] over which `self` iterates.
533    pub fn sequence(&self) -> &Sequence<S, S0, BV> { self.sequence }
534
535    /// Returns index of the value about to return by `next`.
536    pub fn index(&self) -> usize { self.position.lo }
537}
538
539impl<S, S0, BV: Deref<Target = [u64]>> std::iter::Iterator for DiffIterator<'_, S, S0, BV> {
540    type Item = u64;
541
542    fn next(&mut self) -> Option<Self::Item> {
543        let current_value = self.sequence.position_next(&mut self.position)?;
544        let result = current_value - self.prev_value;
545        self.prev_value = current_value;
546        Some(result)
547    }
548}
549
550impl<S, S0, BV: Deref<Target = [u64]>> FusedIterator for DiffIterator<'_, S, S0, BV> {}
551
552/// Points either a position or past the end in Elias-Fano [`Sequence`].
553/// It is a kind of iterator over the [`Sequence`].
554#[derive(Clone, Copy)]
555pub struct Cursor<'ef, S, S0, BV> {
556    sequence: &'ef Sequence<S, S0, BV>,
557    position: Position,
558}
559
560impl<S, S0, BV> Cursor<'_, S, S0, BV> {
561    /// Returns the [`Sequence`] in which `self` points the item.
562    pub fn sequence(&self) -> &Sequence<S, S0, BV> { self.sequence }
563
564    /// Returns whether `self` points is past the end (is invalid).
565    #[inline] pub fn is_end(&self) -> bool { self.position.lo == self.sequence.len }
566
567    /// Returns whether `self` is valid (i.e., not past the end) and thus its value can be obtained.
568    #[inline] pub fn is_valid(&self) -> bool { self.position.lo != self.sequence.len }
569
570    /// Returns [`Sequence`] index pointed by `self`, i.e. converts `self` to index.
571    #[inline] pub fn index(&self) -> usize { self.position.lo }
572}
573
574impl<S, S0, BV: Deref<Target = [u64]>> Cursor<'_, S, S0, BV> {
575    /// Returns value pointed by `self`. Result is undefined if `self` points past the end.
576    #[inline] pub unsafe fn value_unchecked(&self) -> u64 {
577        return self.sequence.value_at_position_unchecked(self.position)
578    }
579
580    /// Returns value pointed by `self` or [`None`] if it points past the end.
581    #[inline] pub fn value(&self) -> Option<u64> {
582        return self.sequence.value_at_position(self.position)
583    }
584
585    /// If possible, advances `self` one position forward and returns `true`. Otherwise returns `false`.
586    #[inline] pub fn advance(&mut self) -> bool {
587        if self.is_end() { return false; }
588        unsafe { self.sequence.advance_position_unchecked(&mut self.position) };
589        true
590    }
591
592    /// If possible, advances `self` one position backward and returns `true`. Otherwise returns `false`.
593    #[inline] pub fn advance_back(&mut self) -> bool {
594        if self.position.lo == 0 { return false; }
595        unsafe { self.sequence.advance_position_back_unchecked(&mut self.position) };
596        true
597    }
598
599    /// Advances `self` one position backward and next returns value pointed by `self`.
600    pub fn next_back(&mut self) -> Option<u64> {
601        (self.position.lo != 0).then(|| unsafe {
602            self.sequence.advance_position_back_unchecked(&mut self.position);
603            self.sequence.value_at_position_unchecked(self.position)
604        })
605    }
606
607    /// Returns difference between the value of `self` and the previous positions.
608    /// The result is undefined if `self` is invalid.
609    #[inline] pub unsafe fn diff_unchecked(&self) -> u64 {
610        self.sequence.diff_at_position_unchecked(self.position)
611    }
612
613    /// Returns difference between the value of `self` and the previous positions,
614    /// or [`None`] if `self` is invalid.
615    #[inline] pub fn diff(&self) -> Option<u64> {
616        self.sequence.diff_at_position(self.position)
617    }
618
619    /// Returns an iterator that gives the the differences between the values of subsequent items,
620    /// starting from `self`.
621    #[inline] pub fn diffs(&self) -> DiffIterator<'_, S, S0, BV> {
622        if self.position.lo == 0 { return self.sequence.diffs(); }
623        let mut prev = self.position;
624        unsafe{self.sequence.advance_position_back_unchecked(&mut prev)};
625        DiffIterator { sequence: self.sequence, position: self.position, prev_value: unsafe{self.sequence.value_at_position_unchecked(prev)} }
626    }
627}
628
629impl<S, S0, BV: Deref<Target = [u64]>> std::iter::Iterator for Cursor<'_, S, S0, BV> {
630    type Item = u64;
631
632    /// Returns value pointed by `self` and advances it one position forward.
633    fn next(&mut self) -> Option<Self::Item> {
634        self.sequence.position_next(&mut self.position)
635    }
636}
637
638impl<S, S0, BV: Deref<Target = [u64]>> FusedIterator for Cursor<'_, S, S0, BV> {}
639
640
641#[cfg(test)]
642mod tests {
643    use bitm::BinaryRankSearch;
644
645    use super::*;
646
647    fn test_read_write<S: SelectForRank101111, S0: Select0ForRank101111>(seq: Sequence<S, S0>) {
648        let mut buff = Vec::new();
649        seq.write(&mut buff).unwrap();
650        assert_eq!(buff.len(), seq.write_bytes());
651        let read = Sequence::<S, S0>::read_s(&mut &buff[..]).unwrap();
652        assert_eq!(seq.len, read.len);
653        assert_eq!(seq.hi.content, read.hi.content);
654        assert_eq!(seq.lo, read.lo);
655    }
656
657    #[test]
658    fn test_empty() {
659        let ef = Builder::new(0, 0).finish();
660        assert_eq!(ef.get(0), None);
661        assert_eq!(ef.rank(0), 0);
662        assert_eq!(ef.iter().collect::<Vec<_>>(), []);
663        assert_eq!(ef.iter().rev().collect::<Vec<_>>(), []);
664        test_read_write(ef);
665    }
666
667    #[test]
668    fn test_small_sparse() {
669        let mut ef = Builder::new(5, 1000);
670        ef.push(0);
671        ef.push(1);
672        ef.push(801);
673        ef.push(920);
674        ef.push(999);
675        let ef = ef.finish_s::<BinaryRankSearch, BinaryRankSearch>();
676        assert_eq!(ef.get(0), Some(0));
677        assert_eq!(ef.get(1), Some(1));
678        assert_eq!(ef.get(2), Some(801));
679        assert_eq!(ef.get(3), Some(920));
680        assert_eq!(ef.get(4), Some(999));
681        assert_eq!(ef.get(5), None);
682        assert_eq!(ef.iter().collect::<Vec<_>>(), [0, 1, 801, 920, 999]);
683        assert_eq!(ef.iter().rev().collect::<Vec<_>>(), [999, 920, 801, 1, 0]);
684        assert_eq!(ef.geq_cursor(801).collect::<Vec<_>>(), [801, 920, 999]);
685        assert_eq!(ef.geq_cursor(802).collect::<Vec<_>>(), [920, 999]);
686        assert_eq!(ef.rank(0), 0);
687        assert_eq!(ef.rank(1), 1);
688        assert_eq!(ef.rank(2), 2);
689        assert_eq!(ef.rank(800), 2);
690        assert_eq!(ef.rank(801), 2);
691        assert_eq!(ef.rank(802), 3);
692        assert_eq!(ef.rank(920), 3);
693        assert_eq!(ef.rank(921), 4);
694        assert_eq!(ef.rank(999), 4);
695        assert_eq!(ef.rank(1000), 5);
696        let mut c = ef.cursor_of(920).unwrap();
697        assert_eq!(c.index(), 3);
698        assert_eq!(c.value(), Some(920));
699        c.advance();
700        assert_eq!(c.index(), 4);
701        assert_eq!(c.value(), Some(999));
702        c.advance();
703        assert_eq!(c.index(), 5);
704        assert_eq!(c.value(), None);
705        test_read_write(ef);
706    }
707
708    #[test]
709    fn test_small_dense() {
710        let mut ef = Builder::new(5, 6);
711        ef.push(0);
712        ef.push(1);
713        ef.push(3);
714        ef.push(3);
715        ef.push(5);
716        let ef: Sequence = ef.finish();
717        assert_eq!(ef.get(0), Some(0));
718        assert_eq!(ef.get(1), Some(1));
719        assert_eq!(ef.get(2), Some(3));
720        assert_eq!(ef.get(3), Some(3));
721        assert_eq!(ef.get(4), Some(5));
722        assert_eq!(ef.get(5), None);
723        assert_eq!(ef.iter().collect::<Vec<_>>(), [0, 1, 3, 3, 5]);
724        assert_eq!(ef.geq_cursor(3).collect::<Vec<_>>(), [3, 3, 5]);
725        assert_eq!(ef.geq_cursor(10).collect::<Vec<_>>(), []);
726        assert_eq!(ef.iter().rev().collect::<Vec<_>>(), [5, 3, 3, 1, 0]);
727        assert_eq!(ef.rank(0), 0);
728        assert_eq!(ef.rank(1), 1);
729        assert_eq!(ef.rank(2), 2);
730        assert_eq!(ef.rank(3), 2);
731        assert_eq!(ef.rank(4), 4);
732        assert_eq!(ef.rank(5), 4);
733        assert_eq!(ef.rank(6), 5);
734        assert_eq!(ef.diff(0), Some(0));
735        assert_eq!(ef.diff(1), Some(1));
736        assert_eq!(ef.diff(2), Some(2));
737        assert_eq!(ef.diff(3), Some(0));
738        assert_eq!(ef.diff(4), Some(2));
739        assert_eq!(ef.diff(5), None);
740        assert_eq!(ef.diffs().collect::<Vec<_>>(), [0, 1, 2, 0, 2]);
741        assert_eq!(ef.geq_cursor(3).diffs().collect::<Vec<_>>(), [2, 0, 2]);
742        test_read_write(ef);
743    }
744
745    #[test]
746    fn test_mid_1() {
747        let mut ef = Builder::new(1<<16, 1<<16);
748        ef.push_all(0..1<<16);
749        let ef: Sequence = ef.finish();
750        for i in (1..1<<16).step_by(33) {
751            assert_eq!(ef.get(i), Some(i as u64));
752            assert_eq!(ef.diff(i), Some(1));
753            assert_eq!(ef.index_of(i as u64), Some(i));
754            assert_eq!(ef.geq_index(i as u64), i);
755        }
756        test_read_write(ef);
757    }
758
759    #[test]
760    fn test_mid_3() {
761        let mut ef = Builder::new(1<<16, (1<<16)*3 + 1);
762        ef.push_all((1..=1<<16).map(|v|v*3));   //1, 3, 6, ...
763        let ef: Sequence = ef.finish();
764        for i in (1usize..1<<16).step_by(33) {
765            let value = (i as u64 + 1) * 3;
766            assert_eq!(ef.get(i), Some(value), "get({}) should be {}", i, value);
767            assert_eq!(ef.diff(i), Some(3));
768            assert_eq!(ef.index_of(value), Some(i));
769            assert_eq!(ef.geq_index(value), i);
770        }
771        test_read_write(ef);
772    }
773
774    #[cfg(target_pointer_width = "64")]
775    #[test]
776    #[ignore = "uses much memory and time"]
777    fn test_huge() {
778        let mut ef = Builder::new(1<<33, (1<<33)/2 + 1);
779        ef.push_all((1..=1<<33).map(|v|v/2));
780        let ef: Sequence = ef.finish();
781        let mut prev_value = 0;
782        for i in (1<<33)-2050..1<<33 {
783            let value = (i as u64 + 1) / 2;
784            assert_eq!(ef.get(i), Some(value));
785            if prev_value != 0 {
786                if value != prev_value {
787                    assert_eq!(ef.index_of(value), Some(i));
788                    assert_eq!(ef.geq_index(value), i);
789                }
790                assert_eq!(ef.diff(i), Some(value-prev_value));
791            }
792            prev_value = value;
793        }
794        test_read_write(ef);
795    }
796}