Skip to main content

arrow_buffer/buffer/
offset.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::buffer::ScalarBuffer;
19use crate::{ArrowNativeType, MutableBuffer, NullBuffer, OffsetBufferBuilder};
20use std::ops::Deref;
21
22/// A non-empty buffer of monotonically increasing, positive integers.
23///
24/// [`OffsetBuffer`] are used to represent ranges of offsets. An
25/// `OffsetBuffer` of `N+1` items contains `N` such ranges. The start
26/// offset for element `i` is `offsets[i]` and the end offset is
27/// `offsets[i+1]`. Equal offsets represent an empty range.
28///
29/// # Example
30///
31/// This example shows how 5 distinct ranges, are represented using a
32/// 6 entry `OffsetBuffer`. The first entry `(0, 3)` represents the
33/// three offsets `0, 1, 2`. The entry `(3,3)` represent no offsets
34/// (e.g. an empty list).
35///
36/// ```text
37///   ┌───────┐                ┌───┐
38///   │ (0,3) │                │ 0 │
39///   ├───────┤                ├───┤
40///   │ (3,3) │                │ 3 │
41///   ├───────┤                ├───┤
42///   │ (3,4) │                │ 3 │
43///   ├───────┤                ├───┤
44///   │ (4,5) │                │ 4 │
45///   ├───────┤                ├───┤
46///   │ (5,7) │                │ 5 │
47///   └───────┘                ├───┤
48///                            │ 7 │
49///                            └───┘
50///
51///                        Offsets Buffer
52///    Logical
53///    Offsets
54///
55///  (offsets[i],
56///   offsets[i+1])
57/// ```
58#[derive(Debug, Clone, PartialEq, Eq)]
59pub struct OffsetBuffer<O: ArrowNativeType>(ScalarBuffer<O>);
60
61impl<O: ArrowNativeType> OffsetBuffer<O> {
62    /// Create a new [`OffsetBuffer`] from the provided [`ScalarBuffer`]
63    ///
64    /// # Panics
65    ///
66    /// Panics if `buffer` is not a non-empty buffer containing
67    /// monotonically increasing values greater than or equal to zero
68    pub fn new(buffer: ScalarBuffer<O>) -> Self {
69        assert!(!buffer.is_empty(), "offsets cannot be empty");
70        assert!(
71            buffer[0] >= O::usize_as(0),
72            "offsets must be greater than 0"
73        );
74        assert!(
75            buffer.windows(2).all(|w| w[0] <= w[1]),
76            "offsets must be monotonically increasing"
77        );
78        Self(buffer)
79    }
80
81    /// Create a new [`OffsetBuffer`] from the provided [`ScalarBuffer`]
82    ///
83    /// # Safety
84    ///
85    /// `buffer` must be a non-empty buffer containing monotonically increasing
86    /// values greater than or equal to zero
87    pub unsafe fn new_unchecked(buffer: ScalarBuffer<O>) -> Self {
88        Self(buffer)
89    }
90
91    /// Create a new [`OffsetBuffer`] containing a single 0 value
92    pub fn new_empty() -> Self {
93        let buffer = MutableBuffer::from_len_zeroed(std::mem::size_of::<O>());
94        Self(buffer.into_buffer().into())
95    }
96
97    /// Create a new [`OffsetBuffer`] containing `len + 1` `0` values
98    pub fn new_zeroed(len: usize) -> Self {
99        let len_bytes = len
100            .checked_add(1)
101            .and_then(|o| o.checked_mul(std::mem::size_of::<O>()))
102            .expect("overflow");
103        let buffer = MutableBuffer::from_len_zeroed(len_bytes);
104        Self(buffer.into_buffer().into())
105    }
106
107    /// Create a new [`OffsetBuffer`] from the iterator of slice lengths
108    ///
109    /// ```
110    /// # use arrow_buffer::OffsetBuffer;
111    /// let offsets = OffsetBuffer::<i32>::from_lengths([1, 3, 5]);
112    /// assert_eq!(offsets.as_ref(), &[0, 1, 4, 9]);
113    /// ```
114    ///
115    /// If you want to create an [`OffsetBuffer`] where all lengths are the same,
116    /// consider using the faster [`OffsetBuffer::from_repeated_length`] instead.
117    ///
118    /// # Panics
119    ///
120    /// Panics on overflow
121    pub fn from_lengths<I>(lengths: I) -> Self
122    where
123        I: IntoIterator<Item = usize>,
124    {
125        let iter = lengths.into_iter();
126        let mut out = Vec::with_capacity(iter.size_hint().0 + 1);
127        out.push(O::usize_as(0));
128
129        let mut acc = 0_usize;
130        for length in iter {
131            acc = acc.checked_add(length).expect("usize overflow");
132            out.push(O::usize_as(acc))
133        }
134        // Check for overflow
135        O::from_usize(acc).expect("offset overflow");
136        Self(out.into())
137    }
138
139    /// Create a new [`OffsetBuffer`] where each slice has the same length
140    /// `length`, repeated `n` times.
141    ///
142    ///
143    /// Example
144    /// ```
145    /// # use arrow_buffer::OffsetBuffer;
146    /// let offsets = OffsetBuffer::<i32>::from_repeated_length(4, 3);
147    /// assert_eq!(offsets.as_ref(), &[0, 4, 8, 12]);
148    /// ```
149    ///
150    /// # Panics
151    ///
152    /// Panics on overflow
153    pub fn from_repeated_length(length: usize, n: usize) -> Self {
154        if n == 0 {
155            return Self::new_empty();
156        }
157
158        if length == 0 {
159            return Self::new_zeroed(n);
160        }
161
162        // Check for overflow
163        // Making sure we don't overflow usize or O when calculating the total length
164        length.checked_mul(n).expect("usize overflow");
165
166        // Check for overflow
167        O::from_usize(length * n).expect("offset overflow");
168
169        let offsets = (0..=n)
170            .map(|index| O::usize_as(index * length))
171            .collect::<Vec<O>>();
172
173        Self(ScalarBuffer::from(offsets))
174    }
175
176    /// Get an Iterator over the lengths of this [`OffsetBuffer`]
177    ///
178    /// ```
179    /// # use arrow_buffer::{OffsetBuffer, ScalarBuffer};
180    /// let offsets = OffsetBuffer::<_>::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9]));
181    /// assert_eq!(offsets.lengths().collect::<Vec<usize>>(), vec![1, 3, 5]);
182    /// ```
183    ///
184    /// Empty [`OffsetBuffer`] will return an empty iterator
185    /// ```
186    /// # use arrow_buffer::OffsetBuffer;
187    /// let offsets = OffsetBuffer::<i32>::new_empty();
188    /// assert_eq!(offsets.lengths().count(), 0);
189    /// ```
190    ///
191    /// This can be used to merge multiple [`OffsetBuffer`]s to one
192    /// ```
193    /// # use arrow_buffer::{OffsetBuffer, ScalarBuffer};
194    ///
195    /// let buffer1 = OffsetBuffer::<i32>::from_lengths([2, 6, 3, 7, 2]);
196    /// let buffer2 = OffsetBuffer::<i32>::from_lengths([1, 3, 5, 7, 9]);
197    ///
198    /// let merged = OffsetBuffer::<i32>::from_lengths(
199    ///     vec![buffer1, buffer2].iter().flat_map(|x| x.lengths())
200    /// );
201    ///
202    /// assert_eq!(merged.lengths().collect::<Vec<_>>(), &[2, 6, 3, 7, 2, 1, 3, 5, 7, 9]);
203    /// ```
204    pub fn lengths(&self) -> impl ExactSizeIterator<Item = usize> + '_ {
205        self.0.windows(2).map(|x| x[1].as_usize() - x[0].as_usize())
206    }
207
208    /// Free up unused memory.
209    pub fn shrink_to_fit(&mut self) {
210        self.0.shrink_to_fit();
211    }
212
213    /// Returns the inner [`ScalarBuffer`]
214    pub fn inner(&self) -> &ScalarBuffer<O> {
215        &self.0
216    }
217
218    /// Returns the inner [`ScalarBuffer`], consuming self
219    pub fn into_inner(self) -> ScalarBuffer<O> {
220        self.0
221    }
222
223    /// Claim memory used by this buffer in the provided memory pool.
224    #[cfg(feature = "pool")]
225    pub fn claim(&self, pool: &dyn crate::MemoryPool) {
226        self.0.claim(pool);
227    }
228
229    /// Returns a zero-copy slice of this buffer with length `len` and starting at `offset`
230    pub fn slice(&self, offset: usize, len: usize) -> Self {
231        Self(self.0.slice(offset, len.saturating_add(1)))
232    }
233
234    /// Returns true if this [`OffsetBuffer`] is equal to `other`, using pointer comparisons
235    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
236    /// return false when the arrays are logically equal
237    #[inline]
238    pub fn ptr_eq(&self, other: &Self) -> bool {
239        self.0.ptr_eq(&other.0)
240    }
241
242    /// Check if any null positions in the `null_buffer` correspond to
243    /// non-empty ranges in this [`OffsetBuffer`].
244    ///
245    /// In variable-length array types (e.g., `StringArray`, `ListArray`),
246    /// null entries may or may not have empty offset ranges. This method
247    /// detects cases where a null entry has a non-empty range
248    /// (i.e., `offsets[i] != offsets[i+1]`), which means the underlying
249    /// data buffer contains data behind nulls.
250    ///
251    /// This matters because unwrapping (flattening) a list array exposes
252    /// the child values, including those behind null entries. If null
253    /// entries point to non-empty ranges, the unwrapped values will
254    /// contain data that may not be meaningful to operate on and could
255    /// cause errors (e.g., division by zero in the child values).
256    ///
257    /// Returns `false` if `null_buffer` is `None` or contains no nulls.
258    ///
259    /// # Example
260    ///
261    /// ```
262    /// # use arrow_buffer::{OffsetBuffer, ScalarBuffer, NullBuffer};
263    /// // Offsets where null at index 1 has an empty range (3..3)
264    /// let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 3, 6]));
265    /// let nulls = NullBuffer::from(vec![true, false, true]);
266    /// assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
267    ///
268    /// // Offsets where null at index 1 has a non-empty range (3..7)
269    /// let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 7, 10]));
270    /// let nulls = NullBuffer::from(vec![true, false, true]);
271    /// assert!(offsets.has_non_empty_nulls(Some(&nulls)));
272    /// ```
273    ///
274    /// # Panics
275    ///
276    /// Panics if the length of the `null_buffer` does not equal `self.len() - 1`.
277    pub fn has_non_empty_nulls(&self, null_buffer: Option<&NullBuffer>) -> bool {
278        let Some(null_buffer) = null_buffer else {
279            return false;
280        };
281
282        assert_eq!(
283            self.len() - 1,
284            null_buffer.len(),
285            "The length of the offsets should be 1 more than the length of the null buffer"
286        );
287
288        if null_buffer.null_count() == 0 {
289            return false;
290        }
291
292        // Offsets always have at least 1 value
293        let initial_offset = self[0];
294        let last_offset = self[self.len() - 1];
295
296        // If all the values are null (offsets have 1 more value than the length of the array)
297        if null_buffer.null_count() == self.len() - 1 {
298            return last_offset != initial_offset;
299        }
300
301        let mut valid_slices_iter = null_buffer.valid_slices();
302
303        // This is safe as we validated that are at least 1 valid value in the array
304        let (start, end) = valid_slices_iter.next().unwrap();
305
306        // If the nulls before have length greater than 0
307        if self[start] != initial_offset {
308            return true;
309        }
310
311        // End is exclusive, so it already point to the last offset value
312        // This is valid as the length of the array is always 1 less than the length of the offsets
313        let mut end_offset_of_last_valid_value = self[end];
314
315        for (start, end) in valid_slices_iter {
316            // If there is a null value that point to a non-empty value than the start offset of the valid value
317            // will be different that the end offset of the last valid value
318            if self[start] != end_offset_of_last_valid_value {
319                return true;
320            }
321
322            // End is exclusive, so it already point to the last offset value
323            // This is valid as the length of the array is always 1 less than the length of the offsets
324            end_offset_of_last_valid_value = self[end];
325        }
326
327        end_offset_of_last_valid_value != last_offset
328    }
329}
330
331impl<T: ArrowNativeType> Deref for OffsetBuffer<T> {
332    type Target = [T];
333
334    #[inline]
335    fn deref(&self) -> &Self::Target {
336        &self.0
337    }
338}
339
340impl<T: ArrowNativeType> AsRef<[T]> for OffsetBuffer<T> {
341    #[inline]
342    fn as_ref(&self) -> &[T] {
343        self
344    }
345}
346
347impl<O: ArrowNativeType> From<OffsetBufferBuilder<O>> for OffsetBuffer<O> {
348    fn from(value: OffsetBufferBuilder<O>) -> Self {
349        value.finish()
350    }
351}
352
353impl<O: ArrowNativeType> Default for OffsetBuffer<O> {
354    fn default() -> Self {
355        Self::new_empty()
356    }
357}
358
359#[cfg(test)]
360mod tests {
361    use super::*;
362
363    #[test]
364    #[should_panic(expected = "offsets cannot be empty")]
365    fn empty_offsets() {
366        OffsetBuffer::new(Vec::<i32>::new().into());
367    }
368
369    #[test]
370    #[should_panic(expected = "offsets must be greater than 0")]
371    fn negative_offsets() {
372        OffsetBuffer::new(vec![-1, 0, 1].into());
373    }
374
375    #[test]
376    fn offsets() {
377        OffsetBuffer::new(vec![0, 1, 2, 3].into());
378
379        let offsets = OffsetBuffer::<i32>::new_zeroed(3);
380        assert_eq!(offsets.as_ref(), &[0; 4]);
381
382        let offsets = OffsetBuffer::<i32>::new_zeroed(0);
383        assert_eq!(offsets.as_ref(), &[0; 1]);
384    }
385
386    #[test]
387    #[should_panic(expected = "overflow")]
388    fn offsets_new_zeroed_overflow() {
389        OffsetBuffer::<i32>::new_zeroed(usize::MAX);
390    }
391
392    #[test]
393    #[should_panic(expected = "offsets must be monotonically increasing")]
394    fn non_monotonic_offsets() {
395        OffsetBuffer::new(vec![1, 2, 0].into());
396    }
397
398    #[test]
399    fn from_lengths() {
400        let buffer = OffsetBuffer::<i32>::from_lengths([2, 6, 3, 7, 2]);
401        assert_eq!(buffer.as_ref(), &[0, 2, 8, 11, 18, 20]);
402
403        let half_max = i32::MAX / 2;
404        let buffer = OffsetBuffer::<i32>::from_lengths([half_max as usize, half_max as usize]);
405        assert_eq!(buffer.as_ref(), &[0, half_max, half_max * 2]);
406    }
407
408    #[test]
409    #[should_panic(expected = "offset overflow")]
410    fn from_lengths_offset_overflow() {
411        OffsetBuffer::<i32>::from_lengths([i32::MAX as usize, 1]);
412    }
413
414    #[test]
415    #[should_panic(expected = "usize overflow")]
416    fn from_lengths_usize_overflow() {
417        OffsetBuffer::<i32>::from_lengths([usize::MAX, 1]);
418    }
419
420    #[test]
421    #[should_panic(expected = "offset overflow")]
422    fn from_repeated_lengths_offset_length_overflow() {
423        OffsetBuffer::<i32>::from_repeated_length(i32::MAX as usize / 4, 5);
424    }
425
426    #[test]
427    #[should_panic(expected = "offset overflow")]
428    fn from_repeated_lengths_offset_repeat_overflow() {
429        OffsetBuffer::<i32>::from_repeated_length(1, i32::MAX as usize + 1);
430    }
431
432    #[test]
433    #[should_panic(expected = "offset overflow")]
434    fn from_repeated_lengths_usize_length_overflow() {
435        OffsetBuffer::<i32>::from_repeated_length(usize::MAX, 1);
436    }
437
438    #[test]
439    #[should_panic(expected = "usize overflow")]
440    fn from_repeated_lengths_usize_length_usize_overflow() {
441        OffsetBuffer::<i32>::from_repeated_length(usize::MAX, 2);
442    }
443
444    #[test]
445    #[should_panic(expected = "offset overflow")]
446    fn from_repeated_lengths_usize_repeat_overflow() {
447        OffsetBuffer::<i32>::from_repeated_length(1, usize::MAX);
448    }
449
450    #[test]
451    fn get_lengths() {
452        let offsets = OffsetBuffer::<i32>::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9]));
453        assert_eq!(offsets.lengths().collect::<Vec<usize>>(), vec![1, 3, 5]);
454    }
455
456    #[test]
457    fn get_lengths_should_be_with_fixed_size() {
458        let offsets = OffsetBuffer::<i32>::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9]));
459        let iter = offsets.lengths();
460        assert_eq!(iter.size_hint(), (3, Some(3)));
461        assert_eq!(iter.len(), 3);
462    }
463
464    #[test]
465    fn get_lengths_from_empty_offset_buffer_should_be_empty_iterator() {
466        let offsets = OffsetBuffer::<i32>::new_empty();
467        assert_eq!(offsets.lengths().collect::<Vec<usize>>(), vec![]);
468    }
469
470    #[test]
471    fn impl_eq() {
472        fn are_equal<T: Eq>(a: &T, b: &T) -> bool {
473            a.eq(b)
474        }
475
476        assert!(
477            are_equal(
478                &OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9])),
479                &OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9]))
480            ),
481            "OffsetBuffer should implement Eq."
482        );
483    }
484
485    #[test]
486    fn impl_default() {
487        let default = OffsetBuffer::<i32>::default();
488        assert_eq!(default.as_ref(), &[0]);
489    }
490
491    #[test]
492    fn from_repeated_length_basic() {
493        // Basic case with length 4, repeated 3 times
494        let buffer = OffsetBuffer::<i32>::from_repeated_length(4, 3);
495        assert_eq!(buffer.as_ref(), &[0, 4, 8, 12]);
496
497        // Verify the lengths are correct
498        let lengths: Vec<usize> = buffer.lengths().collect();
499        assert_eq!(lengths, vec![4, 4, 4]);
500    }
501
502    #[test]
503    fn from_repeated_length_single_repeat() {
504        // Length 5, repeated once
505        let buffer = OffsetBuffer::<i32>::from_repeated_length(5, 1);
506        assert_eq!(buffer.as_ref(), &[0, 5]);
507
508        let lengths: Vec<usize> = buffer.lengths().collect();
509        assert_eq!(lengths, vec![5]);
510    }
511
512    #[test]
513    fn from_repeated_length_zero_repeats() {
514        let buffer = OffsetBuffer::<i32>::from_repeated_length(10, 0);
515        assert_eq!(buffer, OffsetBuffer::<i32>::new_empty());
516    }
517
518    #[test]
519    fn from_repeated_length_zero_length() {
520        // Zero length, repeated 5 times (all zeros)
521        let buffer = OffsetBuffer::<i32>::from_repeated_length(0, 5);
522        assert_eq!(buffer.as_ref(), &[0, 0, 0, 0, 0, 0]);
523
524        // All lengths should be 0
525        let lengths: Vec<usize> = buffer.lengths().collect();
526        assert_eq!(lengths, vec![0, 0, 0, 0, 0]);
527    }
528
529    #[test]
530    fn from_repeated_length_large_values() {
531        // Test with larger values that don't overflow
532        let buffer = OffsetBuffer::<i32>::from_repeated_length(1000, 100);
533        assert_eq!(buffer[0], 0);
534
535        // Verify all lengths are 1000
536        let lengths: Vec<usize> = buffer.lengths().collect();
537        assert_eq!(lengths.len(), 100);
538        assert!(lengths.iter().all(|&len| len == 1000));
539    }
540
541    #[test]
542    fn from_repeated_length_unit_length() {
543        // Length 1, repeated multiple times
544        let buffer = OffsetBuffer::<i32>::from_repeated_length(1, 10);
545        assert_eq!(buffer.as_ref(), &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
546
547        let lengths: Vec<usize> = buffer.lengths().collect();
548        assert_eq!(lengths, vec![1; 10]);
549    }
550
551    #[test]
552    fn from_repeated_length_max_safe_values() {
553        // Test with maximum safe values for i32
554        // i32::MAX / 3 ensures we don't overflow when repeated twice
555        let third_max = (i32::MAX / 3) as usize;
556        let buffer = OffsetBuffer::<i32>::from_repeated_length(third_max, 2);
557        assert_eq!(
558            buffer.as_ref(),
559            &[0, third_max as i32, (third_max * 2) as i32]
560        );
561    }
562
563    // ---------------------------------------------------------------
564    // Tests for has_non_empty_nulls
565    // ---------------------------------------------------------------
566
567    #[test]
568    fn has_non_empty_nulls_none_null_buffer() {
569        // No null buffer at all -> false
570        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 5, 8]));
571        assert!(!offsets.has_non_empty_nulls(None));
572    }
573
574    #[test]
575    fn has_non_empty_nulls_all_valid() {
576        // Null buffer with zero nulls -> false (early return via filter)
577        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 5, 8]));
578        let nulls = NullBuffer::new_valid(3);
579        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
580    }
581
582    #[test]
583    fn has_non_empty_nulls_all_null_empty_offsets() {
584        // All values are null and all offsets are equal (no data behind nulls) -> false
585        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0, 0, 0]));
586        let nulls = NullBuffer::new_null(3);
587        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
588    }
589
590    #[test]
591    fn has_non_empty_nulls_all_null_non_empty_offsets() {
592        // All values are null but offsets span data -> true
593        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 2, 5, 7]));
594        let nulls = NullBuffer::new_null(3);
595        assert!(offsets.has_non_empty_nulls(Some(&nulls)));
596    }
597
598    #[test]
599    fn has_non_empty_nulls_all_null_nonzero_but_equal_offsets() {
600        // All null, offsets start at non-zero but are all equal -> false
601        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![5, 5, 5]));
602        let nulls = NullBuffer::new_null(2);
603        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
604    }
605
606    #[test]
607    fn has_non_empty_nulls_leading_nulls_with_data() {
608        // Nulls at the beginning that point to non-empty ranges -> true
609        // offsets: [0, 3, 5, 8]  nulls: [false, true, true]
610        // Index 0 is null with range 0..3 (non-empty)
611        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 5, 8]));
612        let nulls = NullBuffer::from(vec![false, true, true]);
613        assert!(offsets.has_non_empty_nulls(Some(&nulls)));
614    }
615
616    #[test]
617    fn has_non_empty_nulls_leading_nulls_without_data() {
618        // Nulls at the beginning with empty ranges -> continue checking
619        // offsets: [0, 0, 3, 6]  nulls: [false, true, true]
620        // Index 0 is null with range 0..0 (empty)
621        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0, 3, 6]));
622        let nulls = NullBuffer::from(vec![false, true, true]);
623        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
624    }
625
626    #[test]
627    fn has_non_empty_nulls_only_trailing_null_has_data() {
628        // Only the trailing null region has data, everything else is clean
629        // offsets: [0, 0, 3, 6, 8]  nulls: [false, true, true, false]
630        // Null at 0 (0..0 empty), valid at 1,2 (0..3, 3..6), null at 3 (6..8 non-empty)
631        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0, 3, 6, 8]));
632        let nulls = NullBuffer::from(vec![false, true, true, false]);
633        assert!(offsets.has_non_empty_nulls(Some(&nulls)));
634    }
635
636    #[test]
637    fn has_non_empty_nulls_trailing_nulls_without_data() {
638        // Nulls at the end with empty ranges -> false
639        // offsets: [0, 3, 6, 6]  nulls: [true, true, false]
640        // Index 2 is null with range 6..6 (empty)
641        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 6, 6]));
642        let nulls = NullBuffer::from(vec![true, true, false]);
643        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
644    }
645
646    #[test]
647    fn has_non_empty_nulls_middle_nulls_with_data() {
648        // Null in the middle with non-empty range -> true
649        // offsets: [0, 3, 7, 10]  nulls: [true, false, true]
650        // Index 1 is null with range 3..7 (non-empty)
651        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 7, 10]));
652        let nulls = NullBuffer::from(vec![true, false, true]);
653        assert!(offsets.has_non_empty_nulls(Some(&nulls)));
654    }
655
656    #[test]
657    fn has_non_empty_nulls_middle_nulls_without_data() {
658        // Null in the middle with empty range -> false
659        // offsets: [0, 3, 3, 6]  nulls: [true, false, true]
660        // Index 1 is null with range 3..3 (empty)
661        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 3, 6]));
662        let nulls = NullBuffer::from(vec![true, false, true]);
663        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
664    }
665
666    #[test]
667    fn has_non_empty_nulls_alternating_null_valid_all_empty() {
668        // Alternating null/valid where every null has an empty range -> false.
669
670        // Ends with null
671        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0, 3, 3, 6, 6]));
672        let nulls = NullBuffer::from(vec![false, true, false, true, false]);
673        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
674
675        // Ends with valid
676        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0, 3, 3, 6, 6, 9]));
677        let nulls = NullBuffer::from(vec![false, true, false, true, false, true]);
678        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
679    }
680
681    #[test]
682    fn has_non_empty_nulls_multiple_null_regions_second_has_data() {
683        // Two null regions: first empty, second non-empty -> true
684        // offsets: [0, 0, 3, 5, 6]  nulls: [false, true, false, true]
685        // Null at index 0 (0..0 empty), null at index 2 (3..5 non-empty)
686        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0, 3, 5, 6]));
687        let nulls = NullBuffer::from(vec![false, true, false, true]);
688        assert!(offsets.has_non_empty_nulls(Some(&nulls)));
689    }
690
691    #[test]
692    fn has_non_empty_nulls_multiple_null_regions_later_gap_has_data() {
693        // Three null regions: first two empty, third non-empty -> true
694        // offsets: [0, 0, 3, 3, 6, 8, 10]  nulls: [false, true, false, true, false, true]
695        // valid_slices: (1,2), (3,4), (5,6)
696        // first slice: start=1, self[1]=0 == initial_offset=0 OK, end_offset=self[2]=3
697        // loop iter 1: start=3, self[3]=3 == 3 OK (first gap empty), end_offset=self[4]=6
698        // loop iter 2: start=5, self[5]=8 != 6 -> true (second gap has data)
699        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0, 3, 3, 6, 8, 10]));
700        let nulls = NullBuffer::from(vec![false, true, false, true, false, true]);
701        assert!(offsets.has_non_empty_nulls(Some(&nulls)));
702    }
703
704    #[test]
705    fn has_non_empty_nulls_single_element_null_empty() {
706        // Single element, null with empty range -> false
707        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0]));
708        let nulls = NullBuffer::new_null(1);
709        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
710    }
711
712    #[test]
713    fn has_non_empty_nulls_single_element_null_non_empty() {
714        // Single element, null with non-empty range -> true
715        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 5]));
716        let nulls = NullBuffer::new_null(1);
717        assert!(offsets.has_non_empty_nulls(Some(&nulls)));
718    }
719
720    #[test]
721    fn has_non_empty_nulls_single_element_valid() {
722        // Single element, valid -> false (no nulls at all)
723        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 5]));
724        let nulls = NullBuffer::new_valid(1);
725        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
726    }
727
728    #[test]
729    fn has_non_empty_nulls_consecutive_nulls_between_valid_slices() {
730        // Multiple consecutive nulls between valid regions
731        // offsets: [0, 2, 2, 2, 5, 8]  nulls: [true, false, false, true, true]
732        // Valid: [0], nulls: [1,2], valid: [3,4]
733        // Null region [1,2] has offsets 2..2..2 (empty) -> false
734        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 2, 2, 2, 5, 8]));
735        let nulls = NullBuffer::from(vec![true, false, false, true, true]);
736        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
737    }
738
739    #[test]
740    fn has_non_empty_nulls_consecutive_nulls_between_valid_slices_with_data() {
741        // Multiple consecutive nulls between valid regions, nulls have data
742        // offsets: [0, 2, 3, 4, 5, 8]  nulls: [true, false, false, true, true]
743        // valid_slices: (0,1), (3,5)
744        // first slice: start=0, end=1 -> self[0]=0 == initial_offset=0 OK
745        //   end_offset_of_last_valid_value = self[1] = 2
746        // second slice: start=3, end=5 -> self[3]=4 != 2 -> true
747        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 2, 3, 4, 5, 8]));
748        let nulls = NullBuffer::from(vec![true, false, false, true, true]);
749        assert!(offsets.has_non_empty_nulls(Some(&nulls)));
750    }
751
752    #[test]
753    fn has_non_empty_nulls_nonzero_initial_offset_all_null_equal() {
754        // Non-zero starting offset, all null, all offsets equal -> false
755        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![10, 10, 10]));
756        let nulls = NullBuffer::new_null(2);
757        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
758    }
759
760    #[test]
761    fn has_non_empty_nulls_nonzero_initial_offset_with_data() {
762        // Non-zero starting offset, null has data
763        // offsets: [10, 15, 20]  nulls: [false, true]
764        // Null at index 0 with range 10..15 (non-empty) -> true
765        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![10, 15, 20]));
766        let nulls = NullBuffer::from(vec![false, true]);
767        assert!(offsets.has_non_empty_nulls(Some(&nulls)));
768    }
769
770    #[test]
771    fn has_non_empty_nulls_sliced_no_nulls_in_null_region() {
772        // Original: [0, 3, 3, 6, 6, 9]  -> slice(1, 3) -> [3, 3, 6, 6]
773        // initial_offset=3, last_offset=6
774        // nulls: [false, true, false]  (null at index 0 has range 3..3 = empty)
775        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 3, 6, 6, 9]));
776        let sliced = offsets.slice(1, 3);
777        let nulls = NullBuffer::from(vec![false, true, false]);
778        assert!(!sliced.has_non_empty_nulls(Some(&nulls)));
779    }
780
781    #[test]
782    fn has_non_empty_nulls_sliced_null_has_data() {
783        // Original: [0, 3, 7, 10, 15]  -> slice(1, 2) -> [3, 7, 10]
784        // initial_offset=3, last_offset=10
785        // nulls: [false, true]  (null at index 0 has range 3..7 = non-empty)
786        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 7, 10, 15]));
787        let sliced = offsets.slice(1, 2);
788        let nulls = NullBuffer::from(vec![false, true]);
789        assert!(sliced.has_non_empty_nulls(Some(&nulls)));
790    }
791
792    #[test]
793    #[should_panic(
794        expected = "The length of the offsets should be 1 more than the length of the null buffer"
795    )]
796    fn has_non_empty_nulls_all_valid_mismatched_lengths_too_short() {
797        // All-valid null buffer with wrong length should still panic
798        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 5, 8]));
799        let nulls = NullBuffer::new_valid(2); // expects 3
800        offsets.has_non_empty_nulls(Some(&nulls));
801    }
802
803    #[test]
804    #[should_panic(
805        expected = "The length of the offsets should be 1 more than the length of the null buffer"
806    )]
807    fn has_non_empty_nulls_all_valid_mismatched_lengths_too_long() {
808        // All-valid null buffer with wrong length should still panic
809        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 5, 8]));
810        let nulls = NullBuffer::new_valid(5); // expects 3
811        offsets.has_non_empty_nulls(Some(&nulls));
812    }
813}