arrow2/
offset.rs

1//! Contains the declaration of [`Offset`]
2use std::hint::unreachable_unchecked;
3
4use crate::buffer::Buffer;
5use crate::error::Error;
6pub use crate::types::Offset;
7
8/// A wrapper type of [`Vec<O>`] representing the invariants of Arrow's offsets.
9/// It is guaranteed to (sound to assume that):
10/// * every element is `>= 0`
11/// * element at position `i` is >= than element at position `i-1`.
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub struct Offsets<O: Offset>(Vec<O>);
14
15impl<O: Offset> Default for Offsets<O> {
16    #[inline]
17    fn default() -> Self {
18        Self::new()
19    }
20}
21
22impl<O: Offset> TryFrom<Vec<O>> for Offsets<O> {
23    type Error = Error;
24
25    #[inline]
26    fn try_from(offsets: Vec<O>) -> Result<Self, Self::Error> {
27        try_check_offsets(&offsets)?;
28        Ok(Self(offsets))
29    }
30}
31
32impl<O: Offset> TryFrom<Buffer<O>> for OffsetsBuffer<O> {
33    type Error = Error;
34
35    #[inline]
36    fn try_from(offsets: Buffer<O>) -> Result<Self, Self::Error> {
37        try_check_offsets(&offsets)?;
38        Ok(Self(offsets))
39    }
40}
41
42impl<O: Offset> TryFrom<Vec<O>> for OffsetsBuffer<O> {
43    type Error = Error;
44
45    #[inline]
46    fn try_from(offsets: Vec<O>) -> Result<Self, Self::Error> {
47        try_check_offsets(&offsets)?;
48        Ok(Self(offsets.into()))
49    }
50}
51
52impl<O: Offset> From<Offsets<O>> for OffsetsBuffer<O> {
53    #[inline]
54    fn from(offsets: Offsets<O>) -> Self {
55        Self(offsets.0.into())
56    }
57}
58
59impl<O: Offset> Offsets<O> {
60    /// Returns an empty [`Offsets`] (i.e. with a single element, the zero)
61    #[inline]
62    pub fn new() -> Self {
63        Self(vec![O::zero()])
64    }
65
66    /// Returns an [`Offsets`] whose all lengths are zero.
67    #[inline]
68    pub fn new_zeroed(length: usize) -> Self {
69        Self(vec![O::zero(); length + 1])
70    }
71
72    /// Creates a new [`Offsets`] from an iterator of lengths
73    #[inline]
74    pub fn try_from_iter<I: IntoIterator<Item = usize>>(iter: I) -> Result<Self, Error> {
75        let iterator = iter.into_iter();
76        let (lower, _) = iterator.size_hint();
77        let mut offsets = Self::with_capacity(lower);
78        for item in iterator {
79            offsets.try_push_usize(item)?
80        }
81        Ok(offsets)
82    }
83
84    /// Returns a new [`Offsets`] with a capacity, allocating at least `capacity + 1` entries.
85    pub fn with_capacity(capacity: usize) -> Self {
86        let mut offsets = Vec::with_capacity(capacity + 1);
87        offsets.push(O::zero());
88        Self(offsets)
89    }
90
91    /// Returns the capacity of [`Offsets`].
92    pub fn capacity(&self) -> usize {
93        self.0.capacity() - 1
94    }
95
96    /// Reserves `additional` entries.
97    pub fn reserve(&mut self, additional: usize) {
98        self.0.reserve(additional);
99    }
100
101    /// Shrinks the capacity of self to fit.
102    pub fn shrink_to_fit(&mut self) {
103        self.0.shrink_to_fit();
104    }
105
106    /// Pushes a new element with a given length.
107    /// # Error
108    /// This function errors iff the new last item is larger than what `O` supports.
109    /// # Panic
110    /// This function asserts that `length > 0`.
111    #[inline]
112    pub fn try_push(&mut self, length: O) -> Result<(), Error> {
113        let old_length = self.last();
114        assert!(length >= O::zero());
115        let new_length = old_length.checked_add(&length).ok_or(Error::Overflow)?;
116        self.0.push(new_length);
117        Ok(())
118    }
119
120    /// Pushes a new element with a given length.
121    /// # Error
122    /// This function errors iff the new last item is larger than what `O` supports.
123    /// # Implementation
124    /// This function:
125    /// * checks that this length does not overflow
126    #[inline]
127    pub fn try_push_usize(&mut self, length: usize) -> Result<(), Error> {
128        let length = O::from_usize(length).ok_or(Error::Overflow)?;
129
130        let old_length = self.last();
131        let new_length = old_length.checked_add(&length).ok_or(Error::Overflow)?;
132        self.0.push(new_length);
133        Ok(())
134    }
135
136    /// Returns [`Offsets`] assuming that `offsets` fulfills its invariants
137    /// # Safety
138    /// This is safe iff the invariants of this struct are guaranteed in `offsets`.
139    #[inline]
140    pub unsafe fn new_unchecked(offsets: Vec<O>) -> Self {
141        Self(offsets)
142    }
143
144    /// Returns the last offset of this container.
145    #[inline]
146    pub fn last(&self) -> &O {
147        match self.0.last() {
148            Some(element) => element,
149            None => unsafe { unreachable_unchecked() },
150        }
151    }
152
153    /// Returns a range (start, end) corresponding to the position `index`
154    /// # Panic
155    /// This function panics iff `index >= self.len()`
156    #[inline]
157    pub fn start_end(&self, index: usize) -> (usize, usize) {
158        // soundness: the invariant of the function
159        assert!(index < self.len_proxy());
160        unsafe { self.start_end_unchecked(index) }
161    }
162
163    /// Returns a range (start, end) corresponding to the position `index`
164    /// # Safety
165    /// `index` must be `< self.len()`
166    #[inline]
167    pub unsafe fn start_end_unchecked(&self, index: usize) -> (usize, usize) {
168        // soundness: the invariant of the function
169        let start = self.0.get_unchecked(index).to_usize();
170        let end = self.0.get_unchecked(index + 1).to_usize();
171        (start, end)
172    }
173
174    /// Returns the length an array with these offsets would be.
175    #[inline]
176    pub fn len_proxy(&self) -> usize {
177        self.0.len() - 1
178    }
179
180    #[inline]
181    /// Returns the number of offsets in this container.
182    pub fn len(&self) -> usize {
183        self.0.len()
184    }
185
186    /// Returns the byte slice stored in this buffer
187    #[inline]
188    pub fn as_slice(&self) -> &[O] {
189        self.0.as_slice()
190    }
191
192    /// Pops the last element
193    #[inline]
194    pub fn pop(&mut self) -> Option<O> {
195        if self.len_proxy() == 0 {
196            None
197        } else {
198            self.0.pop()
199        }
200    }
201
202    /// Extends itself with `additional` elements equal to the last offset.
203    /// This is useful to extend offsets with empty values, e.g. for null slots.
204    #[inline]
205    pub fn extend_constant(&mut self, additional: usize) {
206        let offset = *self.last();
207        if additional == 1 {
208            self.0.push(offset)
209        } else {
210            self.0.resize(self.len() + additional, offset)
211        }
212    }
213
214    /// Try to create a new [`Offsets`] from a sequence of `lengths`
215    /// # Errors
216    /// This function errors iff this operation overflows for the maximum value of `O`.
217    #[inline]
218    pub fn try_from_lengths<I: Iterator<Item = usize>>(lengths: I) -> Result<Self, Error> {
219        let mut self_ = Self::with_capacity(lengths.size_hint().0);
220        self_.try_extend_from_lengths(lengths)?;
221        Ok(self_)
222    }
223
224    /// Try extend from an iterator of lengths
225    /// # Errors
226    /// This function errors iff this operation overflows for the maximum value of `O`.
227    #[inline]
228    pub fn try_extend_from_lengths<I: Iterator<Item = usize>>(
229        &mut self,
230        lengths: I,
231    ) -> Result<(), Error> {
232        let mut total_length = 0;
233        let mut offset = *self.last();
234        let original_offset = offset.to_usize();
235
236        let lengths = lengths.map(|length| {
237            total_length += length;
238            O::from_as_usize(length)
239        });
240
241        let offsets = lengths.map(|length| {
242            offset += length; // this may overflow, checked below
243            offset
244        });
245        self.0.extend(offsets);
246
247        let last_offset = original_offset
248            .checked_add(total_length)
249            .ok_or(Error::Overflow)?;
250        O::from_usize(last_offset).ok_or(Error::Overflow)?;
251        Ok(())
252    }
253
254    /// Extends itself from another [`Offsets`]
255    /// # Errors
256    /// This function errors iff this operation overflows for the maximum value of `O`.
257    pub fn try_extend_from_self(&mut self, other: &Self) -> Result<(), Error> {
258        let mut length = *self.last();
259        let other_length = *other.last();
260        // check if the operation would overflow
261        length.checked_add(&other_length).ok_or(Error::Overflow)?;
262
263        let lengths = other.as_slice().windows(2).map(|w| w[1] - w[0]);
264        let offsets = lengths.map(|new_length| {
265            length += new_length;
266            length
267        });
268        self.0.extend(offsets);
269        Ok(())
270    }
271
272    /// Extends itself from another [`Offsets`] sliced by `start, length`
273    /// # Errors
274    /// This function errors iff this operation overflows for the maximum value of `O`.
275    pub fn try_extend_from_slice(
276        &mut self,
277        other: &OffsetsBuffer<O>,
278        start: usize,
279        length: usize,
280    ) -> Result<(), Error> {
281        if length == 0 {
282            return Ok(());
283        }
284        let other = &other.0[start..start + length + 1];
285        let other_length = other.last().expect("Length to be non-zero");
286        let mut length = *self.last();
287        // check if the operation would overflow
288        length.checked_add(other_length).ok_or(Error::Overflow)?;
289
290        let lengths = other.windows(2).map(|w| w[1] - w[0]);
291        let offsets = lengths.map(|new_length| {
292            length += new_length;
293            length
294        });
295        self.0.extend(offsets);
296        Ok(())
297    }
298
299    /// Returns the inner [`Vec`].
300    #[inline]
301    pub fn into_inner(self) -> Vec<O> {
302        self.0
303    }
304}
305
306/// Checks that `offsets` is monotonically increasing.
307fn try_check_offsets<O: Offset>(offsets: &[O]) -> Result<(), Error> {
308    // this code is carefully constructed to auto-vectorize, don't change naively!
309    match offsets.first() {
310        None => Err(Error::oos("offsets must have at least one element")),
311        Some(first) => {
312            if *first < O::zero() {
313                return Err(Error::oos("offsets must be larger than 0"));
314            }
315            let mut previous = *first;
316            let mut any_invalid = false;
317
318            // This loop will auto-vectorize because there is not any break,
319            // an invalid value will be returned once the whole offsets buffer is processed.
320            for offset in offsets {
321                if previous > *offset {
322                    any_invalid = true
323                }
324                previous = *offset;
325            }
326
327            if any_invalid {
328                Err(Error::oos("offsets must be monotonically increasing"))
329            } else {
330                Ok(())
331            }
332        }
333    }
334}
335
336/// A wrapper type of [`Buffer<O>`] that is guaranteed to:
337/// * Always contain an element
338/// * Every element is `>= 0`
339/// * element at position `i` is >= than element at position `i-1`.
340#[derive(Clone, PartialEq, Debug)]
341pub struct OffsetsBuffer<O: Offset>(Buffer<O>);
342
343impl<O: Offset> Default for OffsetsBuffer<O> {
344    #[inline]
345    fn default() -> Self {
346        Self(vec![O::zero()].into())
347    }
348}
349
350impl<O: Offset> OffsetsBuffer<O> {
351    /// # Safety
352    /// This is safe iff the invariants of this struct are guaranteed in `offsets`.
353    #[inline]
354    pub unsafe fn new_unchecked(offsets: Buffer<O>) -> Self {
355        Self(offsets)
356    }
357
358    /// Returns an empty [`OffsetsBuffer`] (i.e. with a single element, the zero)
359    #[inline]
360    pub fn new() -> Self {
361        Self(vec![O::zero()].into())
362    }
363
364    /// Copy-on-write API to convert [`OffsetsBuffer`] into [`Offsets`].
365    #[inline]
366    pub fn into_mut(self) -> either::Either<Self, Offsets<O>> {
367        self.0
368            .into_mut()
369            // Safety: Offsets and OffsetsBuffer share invariants
370            .map_right(|offsets| unsafe { Offsets::new_unchecked(offsets) })
371            .map_left(Self)
372    }
373
374    /// Returns a reference to its internal [`Buffer`].
375    #[inline]
376    pub fn buffer(&self) -> &Buffer<O> {
377        &self.0
378    }
379
380    /// Returns the length an array with these offsets would be.
381    #[inline]
382    pub fn len_proxy(&self) -> usize {
383        self.0.len() - 1
384    }
385
386    /// Returns the number of offsets in this container.
387    #[inline]
388    pub fn len(&self) -> usize {
389        self.0.len()
390    }
391
392    /// Returns the byte slice stored in this buffer
393    #[inline]
394    pub fn as_slice(&self) -> &[O] {
395        self.0.as_slice()
396    }
397
398    /// Returns the range of the offsets.
399    #[inline]
400    pub fn range(&self) -> O {
401        *self.last() - *self.first()
402    }
403
404    /// Returns the first offset.
405    #[inline]
406    pub fn first(&self) -> &O {
407        match self.0.first() {
408            Some(element) => element,
409            None => unsafe { unreachable_unchecked() },
410        }
411    }
412
413    /// Returns the last offset.
414    #[inline]
415    pub fn last(&self) -> &O {
416        match self.0.last() {
417            Some(element) => element,
418            None => unsafe { unreachable_unchecked() },
419        }
420    }
421
422    /// Returns a range (start, end) corresponding to the position `index`
423    /// # Panic
424    /// This function panics iff `index >= self.len()`
425    #[inline]
426    pub fn start_end(&self, index: usize) -> (usize, usize) {
427        // soundness: the invariant of the function
428        assert!(index < self.len_proxy());
429        unsafe { self.start_end_unchecked(index) }
430    }
431
432    /// Returns a range (start, end) corresponding to the position `index`
433    /// # Safety
434    /// `index` must be `< self.len()`
435    #[inline]
436    pub unsafe fn start_end_unchecked(&self, index: usize) -> (usize, usize) {
437        // soundness: the invariant of the function
438        let start = self.0.get_unchecked(index).to_usize();
439        let end = self.0.get_unchecked(index + 1).to_usize();
440        (start, end)
441    }
442
443    /// Slices this [`OffsetsBuffer`].
444    /// # Panics
445    /// Panics if `offset + length` is larger than `len`
446    /// or `length == 0`.
447    #[inline]
448    pub fn slice(&mut self, offset: usize, length: usize) {
449        assert!(length > 0);
450        self.0.slice(offset, length);
451    }
452
453    /// Slices this [`OffsetsBuffer`] starting at `offset`.
454    /// # Safety
455    /// The caller must ensure `offset + length <= self.len()`
456    #[inline]
457    pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
458        self.0.slice_unchecked(offset, length);
459    }
460
461    /// Returns an iterator with the lengths of the offsets
462    #[inline]
463    pub fn lengths(&self) -> impl Iterator<Item = usize> + '_ {
464        self.0.windows(2).map(|w| (w[1] - w[0]).to_usize())
465    }
466
467    /// Returns the inner [`Buffer`].
468    #[inline]
469    pub fn into_inner(self) -> Buffer<O> {
470        self.0
471    }
472}
473
474impl From<&OffsetsBuffer<i32>> for OffsetsBuffer<i64> {
475    fn from(offsets: &OffsetsBuffer<i32>) -> Self {
476        // this conversion is lossless and uphelds all invariants
477        Self(
478            offsets
479                .buffer()
480                .iter()
481                .map(|x| *x as i64)
482                .collect::<Vec<_>>()
483                .into(),
484        )
485    }
486}
487
488impl TryFrom<&OffsetsBuffer<i64>> for OffsetsBuffer<i32> {
489    type Error = Error;
490
491    fn try_from(offsets: &OffsetsBuffer<i64>) -> Result<Self, Self::Error> {
492        i32::try_from(*offsets.last()).map_err(|_| Error::Overflow)?;
493
494        // this conversion is lossless and uphelds all invariants
495        Ok(Self(
496            offsets
497                .buffer()
498                .iter()
499                .map(|x| *x as i32)
500                .collect::<Vec<_>>()
501                .into(),
502        ))
503    }
504}
505
506impl From<Offsets<i32>> for Offsets<i64> {
507    fn from(offsets: Offsets<i32>) -> Self {
508        // this conversion is lossless and uphelds all invariants
509        Self(
510            offsets
511                .as_slice()
512                .iter()
513                .map(|x| *x as i64)
514                .collect::<Vec<_>>(),
515        )
516    }
517}
518
519impl TryFrom<Offsets<i64>> for Offsets<i32> {
520    type Error = Error;
521
522    fn try_from(offsets: Offsets<i64>) -> Result<Self, Self::Error> {
523        i32::try_from(*offsets.last()).map_err(|_| Error::Overflow)?;
524
525        // this conversion is lossless and uphelds all invariants
526        Ok(Self(
527            offsets
528                .as_slice()
529                .iter()
530                .map(|x| *x as i32)
531                .collect::<Vec<_>>(),
532        ))
533    }
534}
535
536impl<O: Offset> std::ops::Deref for OffsetsBuffer<O> {
537    type Target = [O];
538
539    #[inline]
540    fn deref(&self) -> &[O] {
541        self.0.as_slice()
542    }
543}