flatk/chunked/
clumped_offsets.rs

1//! This module defines the `ClumpedOffsets` type and its behaviour.
2
3use super::*;
4use std::convert::{AsMut, AsRef};
5use std::ops::Range;
6
7#[cfg(feature = "rayon")]
8pub(crate) mod par_iter;
9
10/// A collection of clumped offsets into another collection.
11///
12/// A version of `Offsets` that combines consecutive equidistant offsets.
13///
14/// # Example
15///
16/// To see the correspondence between `Offsets` and `ClumpedOffsets`, consider the following
17/// example
18///
19/// ```
20///     use flatk::{Offsets, ClumpedOffsets};
21///
22///     // with `Offsets` we might have offsets like
23///     let off = Offsets::new(vec![0, 3, 6, 9, 12, 16, 20, 24, 27, 30, 33, 36, 39]);
24///     // which can be represented in `ClumpedOffsets` as
25///     let clumped_off = ClumpedOffsets::new(vec![0, 4, 7, 12], vec![0, 12, 24, 39]);
26///     // Note that ClumpedOffsets would be more compact if the triplets could be combined,
27///     // which would require reorganizing the data indexed by the offsets.
28///
29///     assert_eq!(ClumpedOffsets::from(off), clumped_off);
30/// ```
31///
32/// `ClumpedOffsets` can be a lot more memory efficient when a chunked collection is mostly
33/// uniformly chunked. However, in the worst case, when each consecutive stride is different, this
34/// representation can be three times the size of standard `Offsets`.
35#[derive(Copy, Clone, Debug, PartialEq)]
36pub struct ClumpedOffsets<O = Vec<usize>> {
37    pub chunk_offsets: Offsets<O>,
38    pub offsets: Offsets<O>,
39}
40
41impl<O: AsRef<[usize]>> Set for ClumpedOffsets<O> {
42    type Elem = usize;
43    type Atom = usize;
44
45    #[inline]
46    fn len(&self) -> usize {
47        self.num_offsets()
48    }
49}
50
51impl<O: AsRef<[usize]>> ClumpedOffsets<O> {
52    /// Get the offset value in the clumped offsets collection at the given index of the conceptual
53    /// unclumped offset collection.
54    ///
55    /// This function returns the offset value followed by the index of the clump it belongs to in
56    /// the clumped collection. The last boolean value indicates whether the offset is explicitly
57    /// represented in the clumped offsets, i.e. it lies on the boundary of a clump.
58    ///
59    /// # Safety
60    ///
61    /// It is assumed that `index` is strictly less than `self.num_offsets()`.
62    #[inline]
63    unsafe fn offset_value_and_clump_index_unchecked(&self, index: usize) -> (usize, usize) {
64        let ((_, off), clump_idx, _) = self.get_chunk_at_unchecked(index);
65        (off, clump_idx)
66    }
67
68    /// Get the offset info in the clumped offsets collection located at the given
69    /// index of the conceptual unclumped offset collection.
70    ///
71    /// This function returns the `(chunk_offset_value, offset_value)` pair followed by the index
72    /// of the clump it belongs to in the clumped collection. The last boolean value indicates
73    /// whether the offset is explicitly represented in the clumped offsets, i.e. it lies on the
74    /// boundary of a clump.
75    ///
76    /// # Safety
77    ///
78    /// It is assumed that `index` is strictly less than `self.num_offsets()`.
79    #[inline]
80    unsafe fn get_chunk_at_unchecked(&self, index: usize) -> ((usize, usize), usize, bool) {
81        let ClumpedOffsets {
82            chunk_offsets,
83            offsets,
84        } = self;
85        debug_assert!(chunk_offsets.num_offsets() > 0);
86        debug_assert!(offsets.num_offsets() > 0);
87        // The following is safe by construction of ClumpedOffsets.
88        match chunk_offsets.binary_search(&index) {
89            Ok(clump_idx) => (
90                (
91                    chunk_offsets.offset_value_unchecked(clump_idx),
92                    offsets.offset_value_unchecked(clump_idx),
93                ),
94                clump_idx,
95                true,
96            ),
97            Err(clump_idx) => {
98                // Offset is in the middle of a clump.
99                // Given that idx is not out of bounds as defined in the doc,
100                // the following are safe because index >= 0.
101                // so clump_idx >= 0.
102                let begin_off = offsets.offset_value_unchecked(clump_idx - 1);
103                let clump_dist = offsets.offset_value_unchecked(clump_idx) - begin_off;
104                let begin_clump_off = chunk_offsets.offset_value_unchecked(clump_idx - 1);
105                let clump_size = chunk_offsets.offset_value_unchecked(clump_idx) - begin_clump_off;
106                let stride = clump_dist / clump_size;
107                let chunk_offset = index + chunk_offsets.first_offset_value();
108                let offset = begin_off + stride * (chunk_offset - begin_clump_off);
109                ((chunk_offset, offset), clump_idx - 1, false)
110            }
111        }
112    }
113
114    #[inline]
115    pub fn first_chunk_offset_value(&self) -> usize {
116        self.chunk_offsets.first_offset_value()
117    }
118    #[inline]
119    pub fn last_chunk_offset_value(&self) -> usize {
120        self.chunk_offsets.last_offset_value()
121    }
122
123    /// Returns the number of clump offsets represented by `ClumpedOffsets`.
124    ///
125    /// This is typically significantly smaller than `self.num_offsets()`.
126    #[inline]
127    pub fn num_clump_offsets(&self) -> usize {
128        self.offsets.num_offsets()
129    }
130
131    /// Returns the number of clumps represented by `ClumpedOffsets`.
132    #[inline]
133    pub fn num_clumps(&self) -> usize {
134        self.offsets.num_offsets() - 1
135    }
136
137    /// Compute the stride of the clump at the given index.
138    ///
139    /// # Panics
140    ///
141    /// This function panics if `index` is greater than or equal to `self.num_clumps()`.
142    #[inline]
143    pub fn clump_stride(&self, index: usize) -> usize {
144        assert!(index < self.num_clumps(), "Offset index out of bounds");
145
146        // SAFETY: The length is checked above.
147        unsafe {
148            self.offsets.chunk_len_unchecked(index) / self.chunk_offsets.chunk_len_unchecked(index)
149        }
150    }
151}
152
153// SAFETY: offsets and chunk_offsets are guaranteed to store at least one offset at all times.
154unsafe impl<O: AsRef<[usize]>> GetOffset for ClumpedOffsets<O> {
155    /// A version of `offset_value` without bounds checking.
156    ///
157    /// # Safety
158    ///
159    /// It is assumed that `index` is strictly less than `self.num_offsets()`.
160    #[inline]
161    unsafe fn offset_value_unchecked(&self, index: usize) -> usize {
162        self.offset_value_and_clump_index_unchecked(index).0
163    }
164
165    /// Get the total number of offsets.
166    ///
167    /// This is one more than the number of chunks represented.
168    fn num_offsets(&self) -> usize {
169        let offsets = self.chunk_offsets.as_ref();
170        debug_assert!(!offsets.is_empty(), "Clump offsets are corrupted");
171        unsafe { 1 + offsets.get_unchecked(offsets.len() - 1) - offsets.get_unchecked(0) }
172    }
173}
174
175impl<O: AsRef<[usize]>> ClumpedOffsets<O> {
176    /// An iterator over effective (unclumped) sizes producing an increment for advancing the
177    /// data pointer.
178    ///
179    /// This helps for implementing iterators over `Chunked` types.
180    ///
181    /// # Example
182    ///
183    /// ```
184    ///     use flatk::{Offsets, ClumpedOffsets};
185    ///
186    ///     let off = Offsets::new(vec![0, 3, 6, 9, 12, 16, 20, 24, 27, 30, 33, 36, 39]);
187    ///     let clumped = ClumpedOffsets::from(off);
188    ///     let mut clumped_iter = clumped.sizes();
189    ///     for _ in 0..4 {
190    ///         assert_eq!(clumped_iter.next(), Some(3));
191    ///     }
192    ///     for _ in 0..3 {
193    ///         assert_eq!(clumped_iter.next(), Some(4));
194    ///     }
195    ///     for _ in 0..5 {
196    ///         assert_eq!(clumped_iter.next(), Some(3));
197    ///     }
198    ///     assert_eq!(clumped_iter.next(), None);
199    /// ```
200    #[inline]
201    pub fn sizes(&self) -> UnclumpedSizes {
202        debug_assert!(self.chunk_offsets.num_offsets() > 0);
203        UnclumpedSizes {
204            iter: UnclumpedOffsetValuesAndSizes {
205                stride: 0,
206                cur: self.offsets.first_offset_value(),
207                clumped_offsets: self.view(),
208            },
209        }
210    }
211
212    /// An iterator over unclumped offsets.
213    ///
214    /// This is equivalent to iterating over `Offsets` after conversion, but it doesn't require any
215    /// additional allocations.
216    #[inline]
217    pub fn iter(&self) -> impl Iterator<Item = usize> + '_ {
218        debug_assert!(!self.offsets.as_ref().is_empty());
219        let first = self.first_offset_value();
220        UnclumpedOffsetValues {
221            cur_stride: 0,
222            cur_offset: self.offsets.first_offset_value(),
223            chunk_offsets: self.chunk_offsets.as_ref(),
224            offsets: self.offsets.as_ref(),
225        }
226        .map(move |x| x - first)
227    }
228
229    /// An iterator over unclumped offsets.
230    ///
231    /// This is equivalent to iterating over `Offsets` after conversion, but it doesn't require any
232    /// additional allocations.
233    #[inline]
234    pub fn values(&self) -> UnclumpedOffsetValues {
235        debug_assert!(!self.offsets.as_ref().is_empty());
236        UnclumpedOffsetValues {
237            cur_stride: 0,
238            cur_offset: self.offsets.first_offset_value(),
239            chunk_offsets: self.chunk_offsets.as_ref(),
240            offsets: self.offsets.as_ref(),
241        }
242    }
243}
244
245/// Iterator over offset value and size pairs representing unclumped chunks.
246#[derive(Copy, Clone, Debug, PartialEq)]
247pub struct UnclumpedOffsetValuesAndSizes<'a> {
248    /// Current chunk size.
249    stride: usize,
250    /// Current unclumped offset value.
251    cur: usize,
252    clumped_offsets: ClumpedOffsets<&'a [usize]>,
253}
254
255impl<'a> Iterator for UnclumpedOffsetValuesAndSizes<'a> {
256    type Item = (usize, usize);
257    #[inline]
258    fn next(&mut self) -> Option<Self::Item> {
259        let UnclumpedOffsetValuesAndSizes {
260            stride,
261            cur,
262            clumped_offsets: co,
263        } = self;
264
265        let n = co.offsets.num_offsets();
266        // offsets should never be completely consumed.
267        debug_assert!(n > 0);
268
269        let offset = co.offsets.first_offset_value();
270
271        if *cur == offset {
272            // We reached the end.
273            if n < 2 {
274                return None;
275            }
276
277            let chunk_offset = co.chunk_offsets.first_offset_value();
278
279            // SAFETY: We know there are at least two elements in both offsets and chunk_offsets.
280            unsafe {
281                // Pop the last internal offset, which also potentially changes the stride.
282                co.offsets.remove_prefix_unchecked(1);
283                co.chunk_offsets.remove_prefix_unchecked(1);
284            }
285
286            let next_offset = co.offsets.first_offset_value();
287            let next_chunk_offset = co.chunk_offsets.first_offset_value();
288
289            // Recompute the stride.
290            let clump_dist = next_offset - offset;
291            let chunk_size = next_chunk_offset - chunk_offset;
292            *stride = clump_dist / chunk_size;
293        }
294
295        let off = *cur;
296
297        // Incrementing the head offset effectively pops the unclumped offset.
298        *cur += *stride;
299
300        // The current stride is equal to the next effective unclumped size
301        Some((off, *stride))
302    }
303
304    #[inline]
305    fn size_hint(&self) -> (usize, Option<usize>) {
306        let n = self.clumped_offsets.num_offsets() - 1;
307        (n, Some(n))
308    }
309
310    #[inline]
311    fn count(self) -> usize {
312        self.size_hint().0
313    }
314
315    #[inline]
316    fn nth(&mut self, n: usize) -> Option<Self::Item> {
317        let UnclumpedOffsetValuesAndSizes {
318            stride,
319            cur,
320            clumped_offsets: co,
321        } = self;
322
323        let first_offset = co.offsets.first_offset_value();
324
325        let adjusted_n = if *stride > 0 {
326            let first_clump_count = (first_offset - *cur) / *stride;
327            if first_clump_count > n {
328                // The offset is in the first clump
329                *cur += *stride * (n + 1);
330                return Some((*cur, *stride));
331            } else {
332                n - first_clump_count
333            }
334        } else {
335            n
336        };
337
338        if adjusted_n + 1 < co.num_offsets() {
339            // Binary search for offset value.
340            // SAFETY: This is safe since the bounds are checked above.
341            let (cur_off, clump_idx) =
342                unsafe { co.offset_value_and_clump_index_unchecked(adjusted_n) };
343
344            // SAFETY: There are at least clump_idx+2 elements in offsets and chunk_offsets.
345            let (offset, chunk_offset, next_offset, next_chunk_offset) = unsafe {
346                (
347                    co.offsets.offset_value_unchecked(clump_idx),
348                    co.chunk_offsets.offset_value_unchecked(clump_idx),
349                    co.offsets.offset_value_unchecked(clump_idx + 1),
350                    co.chunk_offsets.offset_value_unchecked(clump_idx + 1),
351                )
352            };
353
354            // Recompute the stride.
355            let clump_dist = next_offset - offset;
356            let chunk_size = next_chunk_offset - chunk_offset;
357            *stride = clump_dist / chunk_size;
358
359            // SAFETY: There are at least clump_idx+2 elements in offsets and chunk_offsets.
360            // This is because offset_value_and_clump_index_unchecked will never return the last offset.
361            unsafe {
362                // Pop the last internal offset (if any).
363                co.offsets.remove_prefix_unchecked(clump_idx + 1);
364                co.chunk_offsets.remove_prefix_unchecked(clump_idx + 1);
365            };
366
367            // Incrementing the head offset effectively pops the unclumped offset.
368            *cur = cur_off + *stride;
369
370            Some((cur_off, *stride))
371        } else {
372            None
373        }
374    }
375}
376
377// TODO: Extend UnclumpedOffsetValuesAndSizes to be double ended or implement a reverse iterator.
378
379/// Iterator over offsets and size pairs representing unclumped chunks.
380#[derive(Copy, Clone, Debug, PartialEq)]
381pub struct UnclumpedOffsetsAndSizes<'a> {
382    first_offset_value: usize,
383    iter: UnclumpedOffsetValuesAndSizes<'a>,
384}
385
386impl UnclumpedOffsetsAndSizes<'_> {
387    #[inline]
388    fn offset_and_size_mapper(&self) -> impl Fn((usize, usize)) -> (usize, usize) + '_ {
389        move |(off, size)| (off - self.first_offset_value, size)
390    }
391}
392
393impl<'a> Iterator for UnclumpedOffsetsAndSizes<'a> {
394    type Item = (usize, usize);
395    #[inline]
396    fn next(&mut self) -> Option<Self::Item> {
397        self.iter.next().map(self.offset_and_size_mapper())
398    }
399
400    #[inline]
401    fn size_hint(&self) -> (usize, Option<usize>) {
402        self.iter.size_hint()
403    }
404
405    #[inline]
406    fn count(self) -> usize {
407        self.iter.count()
408    }
409
410    #[inline]
411    fn nth(&mut self, n: usize) -> Option<Self::Item> {
412        self.iter.nth(n).map(self.offset_and_size_mapper())
413    }
414}
415
416impl ExactSizeIterator for UnclumpedOffsetsAndSizes<'_> {}
417impl std::iter::FusedIterator for UnclumpedOffsetsAndSizes<'_> {}
418
419impl ExactSizeIterator for UnclumpedOffsetValuesAndSizes<'_> {}
420impl std::iter::FusedIterator for UnclumpedOffsetValuesAndSizes<'_> {}
421
422/// Iterator over unclumped chunk sizes.
423#[derive(Copy, Clone, Debug, PartialEq)]
424pub struct UnclumpedSizes<'a> {
425    iter: UnclumpedOffsetValuesAndSizes<'a>,
426}
427
428impl<'a> Iterator for UnclumpedSizes<'a> {
429    type Item = usize;
430    #[inline]
431    fn next(&mut self) -> Option<Self::Item> {
432        self.iter.next().map(|(_, size)| size)
433    }
434
435    #[inline]
436    fn size_hint(&self) -> (usize, Option<usize>) {
437        self.iter.size_hint()
438    }
439
440    #[inline]
441    fn count(self) -> usize {
442        self.iter.count()
443    }
444
445    #[inline]
446    fn nth(&mut self, n: usize) -> Option<Self::Item> {
447        self.iter.nth(n).map(|(_, size)| size)
448    }
449}
450
451impl ExactSizeIterator for UnclumpedSizes<'_> {}
452impl std::iter::FusedIterator for UnclumpedSizes<'_> {}
453
454impl<'a> IntoValues for ClumpedOffsets<&'a [usize]> {
455    type Iter = UnclumpedOffsetValues<'a>;
456
457    /// Returns an iterator over offset values represented by the stored `Offsets`.
458    #[inline]
459    fn into_values(self) -> UnclumpedOffsetValues<'a> {
460        debug_assert!(self.chunk_offsets.num_offsets() > 0);
461        UnclumpedOffsetValues {
462            cur_stride: 0,
463            cur_offset: self.offsets.first_offset_value(),
464            chunk_offsets: self.chunk_offsets.into_inner(),
465            offsets: self.offsets.into_inner(),
466        }
467    }
468}
469
470impl<'a> IntoSizes for ClumpedOffsets<&'a [usize]> {
471    type Iter = UnclumpedSizes<'a>;
472
473    /// Returns an iterator over chunk sizes represented by the stored `ClumpedOffsets`.
474    #[inline]
475    fn into_sizes(self) -> UnclumpedSizes<'a> {
476        debug_assert!(self.chunk_offsets.num_offsets() > 0);
477        UnclumpedSizes {
478            iter: UnclumpedOffsetValuesAndSizes {
479                stride: 0,
480                cur: self.offsets.first_offset_value(),
481                clumped_offsets: self,
482            },
483        }
484    }
485}
486
487impl<'a> IntoOffsetValuesAndSizes for ClumpedOffsets<&'a [usize]> {
488    type Iter = UnclumpedOffsetValuesAndSizes<'a>;
489
490    #[inline]
491    fn into_offset_values_and_sizes(self) -> UnclumpedOffsetValuesAndSizes<'a> {
492        debug_assert!(self.chunk_offsets.num_offsets() > 0);
493        UnclumpedOffsetValuesAndSizes {
494            stride: 0,
495            cur: self.offsets.first_offset_value(),
496            clumped_offsets: self,
497        }
498    }
499}
500
501#[derive(Copy, Clone)]
502pub struct UnclumpedOffsetValues<'a> {
503    cur_stride: usize,
504    cur_offset: usize,
505    chunk_offsets: &'a [usize],
506    offsets: &'a [usize],
507}
508
509impl<'a> Iterator for UnclumpedOffsetValues<'a> {
510    type Item = usize;
511
512    #[inline]
513    fn next(&mut self) -> Option<usize> {
514        let UnclumpedOffsetValues {
515            cur_stride,
516            cur_offset,
517            chunk_offsets,
518            offsets,
519        } = self;
520
521        if chunk_offsets.is_empty() {
522            return None;
523        }
524
525        debug_assert_eq!(chunk_offsets.len(), offsets.len());
526
527        // SAFETY: chunk_offsets length is checked above, and offsets has the same length.
528        let offset = unsafe { *offsets.get_unchecked(0) };
529
530        if *cur_offset == offset {
531            // SAFETY: chunk_offsets length is checked above, and offsets has the same length.
532            let chunk_offset = unsafe { *chunk_offsets.get_unchecked(0) };
533
534            // SAFETY: There is at least one element in both offsets and chunk_offsets.
535            unsafe {
536                // Pop the last internal offset.
537                self.offsets = offsets.get_unchecked(1..);
538                self.chunk_offsets = chunk_offsets.get_unchecked(1..);
539            }
540
541            if let Some(next_offset) = self.offsets.get(0) {
542                // The following is safe since clump_offsets is expected to have the same size as
543                // offsets (debug_assert above checking this invariant).
544                let next_chunk_offset = unsafe { self.chunk_offsets.get_unchecked(0) };
545
546                // Recompute the new stride.
547                let clump_dist = next_offset - offset;
548                let chunk_size = next_chunk_offset - chunk_offset;
549                *cur_stride = clump_dist / chunk_size;
550                *cur_offset += *cur_stride;
551            }
552            Some(offset)
553        } else {
554            let result = Some(*cur_offset);
555            *cur_offset += *cur_stride;
556            result
557        }
558    }
559
560    #[inline]
561    fn size_hint(&self) -> (usize, Option<usize>) {
562        let n = self
563            .chunk_offsets
564            .last()
565            .map(|last| {
566                // SAFETY: there is a last, so there must be a first.
567                last - unsafe { *self.chunk_offsets.get_unchecked(0) } + 1
568            })
569            .unwrap_or(0);
570        (n, Some(n))
571    }
572
573    #[inline]
574    fn count(self) -> usize {
575        self.size_hint().0
576    }
577}
578
579impl ExactSizeIterator for UnclumpedOffsetValues<'_> {}
580impl std::iter::FusedIterator for UnclumpedOffsetValues<'_> {}
581
582impl<'a> SplitOffsetsAt for ClumpedOffsets<&'a [usize]> {
583    /// Same as `split_offsets_at`, but in addition, return the offset of the middle element
584    /// (intersection): this is the value `offsets[mid] - offsets[0]`.
585    ///
586    /// # Panics
587    ///
588    /// Calling this function with an empty slice or with `mid` greater than or equal to its length
589    /// will cause a panic.
590    ///
591    /// This function will also panic when trying to split a clump since the offsets cannot be
592    /// modified or created.
593    #[inline]
594    fn split_offsets_with_intersection_at(
595        self,
596        mid_off: usize,
597    ) -> (
598        ClumpedOffsets<&'a [usize]>,
599        ClumpedOffsets<&'a [usize]>,
600        usize,
601    ) {
602        assert!(mid_off < self.num_offsets());
603        // Try to find the mid in our chunk offsets.
604        // Note that it is the responsibility of the container to ensure that the split is valid.
605        let mid_idx = self
606            .chunk_offsets
607            .binary_search(&mid_off)
608            .expect("Cannot split clumped offsets through a clump");
609        let (los, ros, off) = self.offsets.split_offsets_with_intersection_at(mid_idx);
610        let (lcos, rcos) = self.chunk_offsets.split_offsets_at(mid_idx);
611        (
612            ClumpedOffsets {
613                chunk_offsets: lcos,
614                offsets: los,
615            },
616            ClumpedOffsets {
617                chunk_offsets: rcos,
618                offsets: ros,
619            },
620            off,
621        )
622    }
623
624    /// Splits clumped offsets at the given index into two such that each
625    /// resulting `ClumpedOffsets` is valid. This means that the element at index
626    /// `mid` is shared between the two outputs.
627    ///
628    /// # Panics
629    ///
630    /// Calling this function with an empty slice or with `mid` greater than or equal to its length
631    /// will cause a panic.
632    ///
633    /// This function will also panic when trying to split a clump since the offsets cannot be
634    /// modified or created.
635    #[inline]
636    fn split_offsets_at(
637        self,
638        mid: usize,
639    ) -> (ClumpedOffsets<&'a [usize]>, ClumpedOffsets<&'a [usize]>) {
640        assert!(mid < self.num_offsets());
641        // Try to find the mid in our chunk offsets.
642        // Note that it is the responsibility of the container to ensure that the split is valid.
643        let mid_idx = self
644            .chunk_offsets
645            .binary_search(&mid)
646            .expect("Cannot split clumped offsets through a clump");
647        let (los, ros) = self.offsets.split_offsets_at(mid_idx);
648        let (lcos, rcos) = self.chunk_offsets.split_offsets_at(mid_idx);
649        (
650            ClumpedOffsets {
651                chunk_offsets: lcos,
652                offsets: los,
653            },
654            ClumpedOffsets {
655                chunk_offsets: rcos,
656                offsets: ros,
657            },
658        )
659    }
660}
661
662impl<O> IndexRange for ClumpedOffsets<O>
663where
664    Self: GetOffset,
665{
666    #[inline]
667    unsafe fn index_range_unchecked(&self, range: Range<usize>) -> Range<usize> {
668        let begin = self.offset_unchecked(range.start);
669        let end = self.offset_unchecked(range.end);
670        begin..end
671    }
672    /// Return the `[begin..end)` bound of the chunk at the given index.
673    #[inline]
674    fn index_range(&self, range: Range<usize>) -> Option<Range<usize>> {
675        if range.end < self.num_offsets() {
676            // SAFETY: ghecked the bound above.
677            unsafe { Some(self.index_range_unchecked(range)) }
678        } else {
679            None
680        }
681    }
682}
683
684impl std::iter::FromIterator<usize> for ClumpedOffsets {
685    #[inline]
686    fn from_iter<T>(iter: T) -> Self
687    where
688        T: IntoIterator<Item = usize>,
689    {
690        let mut offset_iter = iter.into_iter();
691        // Start off with a more realistic capacity, this saves a few almost certain allocations.
692        let mut chunk_offsets = Vec::with_capacity(8);
693        let mut offsets = Vec::with_capacity(16);
694        chunk_offsets.push(0);
695
696        if let Some(first) = offset_iter.next() {
697            offsets.push(first);
698            if let Some(mut prev_offset) = offset_iter.next() {
699                let mut prev_stride = prev_offset - first;
700                let mut next_chunk_offset = 1;
701                for offset in offset_iter {
702                    let stride = offset - prev_offset;
703                    if prev_stride != stride {
704                        offsets.push(prev_offset);
705                        chunk_offsets.push(next_chunk_offset);
706                    }
707                    next_chunk_offset += 1;
708                    prev_stride = stride;
709                    prev_offset = offset;
710                }
711                offsets.push(prev_offset);
712                chunk_offsets.push(next_chunk_offset);
713            }
714        } else {
715            offsets.push(0);
716        }
717        // SAFETY: chunk_offsets and offsets are guaranteed to be non-empty.
718        // TODO: Abstract the incremental construction of offsets into `Offsets` to avoid having to
719        // use unsafe here.
720        unsafe {
721            ClumpedOffsets {
722                chunk_offsets: Offsets::from_raw(chunk_offsets),
723                offsets: Offsets::from_raw(offsets),
724            }
725        }
726    }
727}
728
729//impl<O: AsRef<[usize]> + Set> Set for ClumpedOffsets<O> {
730//    type Elem = O::Elem;
731//    type Atom = O::Atom;
732//    #[inline]
733//    fn len(&self) -> usize {
734//        let offsets = self.chunk_offsets.as_ref();
735//        assert!(!offsets.is_empty(), "Clump offsets are corrupted");
736//        unsafe { 1 + offsets.get_unchecked(offsets.len() - 1) - offsets.get_unchecked(0) }
737//    }
738//}
739
740impl<O: Viewed> Viewed for ClumpedOffsets<O> {}
741
742impl<'a, O: AsRef<[usize]>> View<'a> for ClumpedOffsets<O> {
743    type Type = ClumpedOffsets<&'a [usize]>;
744    #[inline]
745    fn view(&'a self) -> Self::Type {
746        ClumpedOffsets {
747            chunk_offsets: self.chunk_offsets.view(),
748            offsets: self.offsets.view(),
749        }
750    }
751}
752
753impl<'a, O: AsMut<[usize]>> ViewMut<'a> for ClumpedOffsets<O> {
754    type Type = ClumpedOffsets<&'a mut [usize]>;
755    #[inline]
756    fn view_mut(&'a mut self) -> Self::Type {
757        ClumpedOffsets {
758            chunk_offsets: self.chunk_offsets.view_mut(),
759            offsets: self.offsets.view_mut(),
760        }
761    }
762}
763
764impl<O: AsRef<[usize]> + Set> From<Offsets<O>> for ClumpedOffsets {
765    /// Convert `Offsets` to owned `ClumpedOffsets`.
766    ///
767    /// This function causes allocations.
768    ///
769    /// # Example
770    ///
771    /// ```
772    /// use flatk::{Offsets, ClumpedOffsets};
773    /// let offsets = Offsets::new(vec![0, 3, 6, 9, 12, 16, 20, 24, 27, 30, 33, 36, 39]);
774    /// let clumped_offsets = ClumpedOffsets::new(vec![0, 4, 7, 12], vec![0, 12, 24, 39]);
775    /// assert_eq!(ClumpedOffsets::from(offsets), clumped_offsets);
776    /// ```
777    #[inline]
778    fn from(offsets: Offsets<O>) -> Self {
779        debug_assert!(offsets.num_offsets() > 0, "Offsets are corrupted.");
780        offsets.values().collect()
781    }
782}
783
784impl<O: AsRef<[usize]> + Set> From<ClumpedOffsets<O>> for Offsets {
785    /// Convert `ClumpedOffsets` into owned `Offsets`.
786    ///
787    /// This function causes allocations.
788    #[inline]
789    fn from(clumped_offsets: ClumpedOffsets<O>) -> Self {
790        debug_assert!(clumped_offsets.num_offsets() > 0, "Offsets are corrupted.");
791        Offsets::new(clumped_offsets.values().collect())
792    }
793}
794
795impl<O: AsRef<[usize]>> ClumpedOffsets<O> {
796    /// Construct new `ClumpedOffsets` from a given valid set of clumped offsets represented as an
797    /// array of `usize`s.
798    ///
799    /// It is possible to create an invalid `ClumpedOffsets` struct using this constructor, however
800    /// it is safe.
801    ///
802    /// # Panics
803    ///
804    /// This function will panic if either `chunk_offsets` or `offsets` is empty.
805    #[inline]
806    pub fn new(chunk_offsets: O, offsets: O) -> Self {
807        ClumpedOffsets {
808            chunk_offsets: Offsets::new(chunk_offsets),
809            offsets: Offsets::new(offsets),
810        }
811    }
812
813    /// An unchecked version of the `new` constructor.
814    ///
815    /// # Safety
816    ///
817    /// Calling this function with empty `chunk_offsets` or `offsets` may cause undefined behaviour
818    /// in safe APIs.
819    #[inline]
820    pub unsafe fn from_raw(chunk_offsets: O, offsets: O) -> Self {
821        ClumpedOffsets {
822            chunk_offsets: Offsets::from_raw(chunk_offsets),
823            offsets: Offsets::from_raw(offsets),
824        }
825    }
826}
827
828/// A default set of offsets must allocate.
829impl Default for ClumpedOffsets<Vec<usize>> {
830    #[inline]
831    fn default() -> Self {
832        ClumpedOffsets {
833            chunk_offsets: Default::default(),
834            offsets: Default::default(),
835        }
836    }
837}
838
839/// A dummy set of offsets will not allocate, but the resulting type does not correspond to valid
840/// offsets.
841impl<O: Dummy> Dummy for ClumpedOffsets<O> {
842    /// This function creates an invalid `ClumpedOffsets`, but it does not allocate.
843    #[inline]
844    unsafe fn dummy() -> Self {
845        ClumpedOffsets {
846            chunk_offsets: Dummy::dummy(),
847            offsets: Dummy::dummy(),
848        }
849    }
850}
851
852impl Clear for ClumpedOffsets {
853    #[inline]
854    fn clear(&mut self) {
855        self.chunk_offsets.truncate(1);
856        self.offsets.truncate(1);
857    }
858}
859
860impl<'a, O> GetIndex<'a, ClumpedOffsets<O>> for usize
861where
862    ClumpedOffsets<O>: GetOffset,
863{
864    type Output = usize;
865    #[inline]
866    fn get(self, clumped_offsets: &ClumpedOffsets<O>) -> Option<Self::Output> {
867        if self < clumped_offsets.num_offsets() {
868            // The following is safe because we checked the bound above.
869            unsafe { Some(clumped_offsets.offset_unchecked(self)) }
870        } else {
871            None
872        }
873    }
874}
875
876impl<'a, O> GetIndex<'a, ClumpedOffsets<O>> for &usize
877where
878    ClumpedOffsets<O>: GetOffset,
879{
880    type Output = usize;
881    #[inline]
882    fn get(self, clumped_offsets: &ClumpedOffsets<O>) -> Option<Self::Output> {
883        GetIndex::get(*self, clumped_offsets)
884    }
885}
886
887impl<O> IsolateIndex<ClumpedOffsets<O>> for usize
888where
889    ClumpedOffsets<O>: GetOffset,
890{
891    type Output = usize;
892    #[inline]
893    unsafe fn isolate_unchecked(self, clumped_offsets: ClumpedOffsets<O>) -> Self::Output {
894        clumped_offsets.offset_unchecked(self)
895    }
896    #[inline]
897    fn try_isolate(self, clumped_offsets: ClumpedOffsets<O>) -> Option<Self::Output> {
898        if self < clumped_offsets.num_offsets() {
899            // The following is safe because we checked the bound above.
900            unsafe { Some(IsolateIndex::isolate_unchecked(self, clumped_offsets)) }
901        } else {
902            None
903        }
904    }
905}
906
907impl<O: Truncate> Truncate for ClumpedOffsets<O> {
908    #[inline]
909    fn truncate(&mut self, new_len: usize) {
910        self.chunk_offsets.truncate(new_len);
911        self.offsets.truncate(new_len);
912    }
913}
914
915impl<O: RemovePrefix + Set> RemovePrefix for ClumpedOffsets<O> {
916    #[inline]
917    fn remove_prefix(&mut self, n: usize) {
918        self.chunk_offsets.remove_prefix(n);
919        self.offsets.remove_prefix(n);
920    }
921}
922
923impl<O: IntoOwned> IntoOwned for ClumpedOffsets<O> {
924    type Owned = ClumpedOffsets<O::Owned>;
925    #[inline]
926    fn into_owned(self) -> Self::Owned {
927        ClumpedOffsets {
928            chunk_offsets: self.chunk_offsets.into_owned(),
929            offsets: self.offsets.into_owned(),
930        }
931    }
932}
933
934impl<O: Reserve> Reserve for ClumpedOffsets<O> {
935    #[inline]
936    fn reserve_with_storage(&mut self, n: usize, storage_n: usize) {
937        self.chunk_offsets.reserve_with_storage(n, storage_n);
938        self.offsets.reserve_with_storage(n, storage_n);
939    }
940}
941
942impl std::iter::Extend<(usize, usize)> for ClumpedOffsets {
943    /// Extend this set of clumped offsets with a given iterator over chunk-offset and offset pairs.
944    ///
945    /// This operation automatically shifts the merged offsets in the iterator
946    /// to start from the last offset in `self`.
947    ///
948    /// Note that there will be 1 less offset added to `self` than produced by
949    /// `iter` since the first offset is only used to determine the relative
950    /// magnitude of the rest and corresponds to the last offset in `self`.
951    fn extend<T: IntoIterator<Item = (usize, usize)>>(&mut self, iter: T) {
952        let mut iter = iter.into_iter();
953        if let Some((first_chunk_offset, first_offset)) = iter.next() {
954            let last_offset = self.last_offset_value();
955            let Self {
956                chunk_offsets,
957                offsets,
958            } = self;
959            chunk_offsets.extend(std::iter::once(first_chunk_offset).chain(iter.map(
960                |(chunk_offset, offset)| {
961                    offsets.push(offset + last_offset - first_offset);
962                    chunk_offset
963                },
964            )));
965        }
966    }
967}
968
969#[cfg(test)]
970mod tests {
971    use super::*;
972    /// Test for the `split_offset_at` helper function.
973    #[test]
974    fn split_offset_at_test() {
975        let offsets = Offsets::new(vec![3, 6, 9, 12, 15, 19, 23, 27, 30, 33, 36, 39, 42]);
976        let clumped_offsets = ClumpedOffsets::from(offsets);
977
978        // Test in the middle
979        let (l, r, off) = clumped_offsets.view().split_offsets_with_intersection_at(4);
980        assert_eq!(Offsets::from(l).into_inner(), &[3, 6, 9, 12, 15]);
981        assert_eq!(
982            Offsets::from(r).into_inner(),
983            &[15, 19, 23, 27, 30, 33, 36, 39, 42]
984        );
985        assert_eq!(off, 12);
986
987        // Test at the beginning
988        let (l, r, off) = clumped_offsets.view().split_offsets_with_intersection_at(0);
989        assert_eq!(Offsets::from(l).into_inner(), &[3]);
990        assert_eq!(
991            Offsets::from(r).into_inner(),
992            &[3, 6, 9, 12, 15, 19, 23, 27, 30, 33, 36, 39, 42]
993        );
994        assert_eq!(off, 0);
995
996        // Test at the end
997        let (l, r, off) = clumped_offsets
998            .view()
999            .split_offsets_with_intersection_at(12);
1000        assert_eq!(
1001            Offsets::from(l).into_inner(),
1002            &[3, 6, 9, 12, 15, 19, 23, 27, 30, 33, 36, 39, 42]
1003        );
1004        assert_eq!(Offsets::from(r).into_inner(), &[42]);
1005        assert_eq!(off, 39);
1006    }
1007
1008    /// Check iterators over offsets and sizes.
1009    #[test]
1010    fn iterators() {
1011        let offsets = Offsets::new(vec![3, 6, 9, 12, 16, 20, 24, 27, 30, 33, 36, 39, 42]);
1012        let clumped_offsets = ClumpedOffsets::from(offsets.clone());
1013        assert_eq!(clumped_offsets.offset(6), 21);
1014
1015        for (orig, unclumped) in offsets.values().zip(clumped_offsets.values()) {
1016            assert_eq!(orig, unclumped);
1017        }
1018        for (orig, unclumped) in offsets.sizes().zip(clumped_offsets.sizes()) {
1019            assert_eq!(orig, unclumped);
1020        }
1021    }
1022
1023    /// Check index_range
1024    #[test]
1025    fn index_range() {
1026        let offsets = Offsets::new(vec![3, 6, 9, 12, 15, 19, 23, 27, 30, 33, 36, 39, 42]);
1027        let clumped_offsets = ClumpedOffsets::from(offsets.clone());
1028        assert_eq!(clumped_offsets.index_range(2..5), Some(6..16));
1029        assert_eq!(clumped_offsets.index_range(0..3), Some(0..9));
1030        assert_eq!(clumped_offsets.index_range(6..8), Some(20..27));
1031        assert_eq!(clumped_offsets.index_range(6..800), None);
1032    }
1033
1034    /// Check that clearing the clumped offsets keeps exactly one element.
1035    #[test]
1036    fn clear() {
1037        let mut offsets = ClumpedOffsets::from(Offsets::new(vec![0, 1, 2, 3, 4, 5]));
1038        offsets.clear();
1039        assert_eq!(offsets, ClumpedOffsets::new(vec![0], vec![0]));
1040    }
1041
1042    /// Check indexing offsets.
1043    #[test]
1044    fn get_offset() {
1045        let s = ClumpedOffsets::new(vec![3, 6, 7], vec![2, 11, 15]);
1046        assert_eq!(0, s.offset(0));
1047        assert_eq!(3, s.offset(1));
1048        assert_eq!(6, s.offset(2));
1049        assert_eq!(9, s.offset(3));
1050        assert_eq!(13, s.offset(4));
1051
1052        // get raw offset values
1053        assert_eq!(2, s.offset_value(0));
1054        assert_eq!(5, s.offset_value(1));
1055        assert_eq!(8, s.offset_value(2));
1056        assert_eq!(11, s.offset_value(3));
1057        assert_eq!(15, s.offset_value(4));
1058    }
1059
1060    #[test]
1061    fn offset_value_iter() {
1062        use ExactSizeIterator;
1063        let offsets = Offsets::new(vec![0, 3, 6, 9, 12, 16, 20, 24, 27, 30, 33, 36, 39]);
1064        let clumped_offsets = ClumpedOffsets::from(offsets.clone());
1065        let iter = clumped_offsets.values();
1066        let iter_len = iter.len();
1067        assert_eq!(iter_len, offsets.num_offsets());
1068        assert_eq!(iter_len, iter.count());
1069
1070        // The following checks that count is correctly implemented for the iterator.
1071        let mut count = 0;
1072        for _ in iter {
1073            count += 1;
1074        }
1075        assert_eq!(iter_len, count);
1076    }
1077
1078    #[test]
1079    fn sizes_iter() {
1080        use ExactSizeIterator;
1081        let offsets = Offsets::new(vec![3, 6, 9, 12, 15, 19, 23, 27, 30, 33, 36, 39, 42]);
1082        let clumped_offsets = ClumpedOffsets::from(offsets.clone());
1083        let mut iter = clumped_offsets.sizes();
1084        let iter_len = iter.len();
1085        assert_eq!(iter_len, offsets.num_offsets() - 1);
1086        assert_eq!(iter_len, iter.count());
1087
1088        for _ in 0..4 {
1089            assert_eq!(iter.next().unwrap(), 3);
1090        }
1091        for _ in 0..3 {
1092            assert_eq!(iter.next().unwrap(), 4);
1093        }
1094        for _ in 0..5 {
1095            assert_eq!(iter.next().unwrap(), 3);
1096        }
1097
1098        assert_eq!(clumped_offsets.sizes().nth(0).unwrap(), 3);
1099        assert_eq!(clumped_offsets.sizes().nth(1).unwrap(), 3);
1100        assert_eq!(clumped_offsets.sizes().nth(2).unwrap(), 3);
1101        assert_eq!(clumped_offsets.sizes().nth(3).unwrap(), 3);
1102        assert_eq!(clumped_offsets.sizes().nth(4).unwrap(), 4);
1103        assert_eq!(clumped_offsets.sizes().nth(5).unwrap(), 4);
1104        assert_eq!(clumped_offsets.sizes().nth(6).unwrap(), 4);
1105        assert_eq!(clumped_offsets.sizes().nth(7).unwrap(), 3);
1106        assert_eq!(clumped_offsets.sizes().nth(8).unwrap(), 3);
1107        assert_eq!(clumped_offsets.sizes().nth(9).unwrap(), 3);
1108        assert_eq!(clumped_offsets.sizes().nth(10).unwrap(), 3);
1109        assert_eq!(clumped_offsets.sizes().nth(11).unwrap(), 3);
1110
1111        // The following checks that count is correctly implemented for the sizes iterator.
1112        let mut count = 0;
1113        for _ in clumped_offsets.sizes() {
1114            count += 1;
1115        }
1116        assert_eq!(iter_len, count);
1117
1118        // Check that using next and nth works together as expected
1119
1120        let mut iter = clumped_offsets.sizes();
1121        assert_eq!(iter.next().unwrap(), 3); // Start with next
1122        assert_eq!(iter.nth(3).unwrap(), 4); // First in clump
1123        assert_eq!(iter.next().unwrap(), 4);
1124        assert_eq!(iter.nth(0).unwrap(), 4); // Last in clump
1125        assert_eq!(iter.next().unwrap(), 3);
1126        assert_eq!(iter.nth(3).unwrap(), 3); // Last in all
1127        assert_eq!(iter.next(), None);
1128        assert_eq!(iter.nth(0), None);
1129
1130        let mut iter = clumped_offsets.sizes();
1131        assert_eq!(iter.nth(3).unwrap(), 3); // Start with nth last in clump
1132        assert_eq!(iter.next().unwrap(), 4);
1133        assert_eq!(iter.nth(2).unwrap(), 3); // First in clump
1134        assert_eq!(iter.next().unwrap(), 3);
1135        assert_eq!(iter.nth(1).unwrap(), 3); // Middle of clump
1136        assert_eq!(iter.next().unwrap(), 3);
1137        assert_eq!(iter.next(), None);
1138        assert_eq!(iter.nth(0), None);
1139
1140        let mut iter = clumped_offsets.sizes();
1141        assert_eq!(iter.nth(5).unwrap(), 4); // Start with nth skipping first clump
1142    }
1143    #[test]
1144    fn extend_clumped_offsets() {
1145        let offsets = Offsets::new(vec![3, 6, 9, 10, 11]);
1146        let mut clumped_offsets = ClumpedOffsets::from(offsets.clone());
1147        let orig_clumped_offsets = clumped_offsets.clone();
1148        clumped_offsets.extend(vec![]); // Safe and no panics.
1149        assert_eq!(clumped_offsets, orig_clumped_offsets);
1150        clumped_offsets.extend(vec![(0, 0)]); // Nothing new added.
1151        assert_eq!(clumped_offsets, orig_clumped_offsets);
1152        clumped_offsets.extend(vec![(1, 1), (3, 5)]); // Nothing new added.
1153        let exp_extended_offsets = Offsets::new(vec![3, 6, 9, 10, 11, 13, 15]);
1154        assert_eq!(clumped_offsets, ClumpedOffsets::from(exp_extended_offsets));
1155    }
1156}