1use crate::buffer::ScalarBuffer;
19use crate::{ArrowNativeType, MutableBuffer, NullBuffer, OffsetBufferBuilder};
20use std::ops::Deref;
21
22#[derive(Debug, Clone, PartialEq, Eq)]
59pub struct OffsetBuffer<O: ArrowNativeType>(ScalarBuffer<O>);
60
61impl<O: ArrowNativeType> OffsetBuffer<O> {
62 pub fn new(buffer: ScalarBuffer<O>) -> Self {
69 assert!(!buffer.is_empty(), "offsets cannot be empty");
70 assert!(
71 buffer[0] >= O::usize_as(0),
72 "offsets must be greater than 0"
73 );
74 assert!(
75 buffer.windows(2).all(|w| w[0] <= w[1]),
76 "offsets must be monotonically increasing"
77 );
78 Self(buffer)
79 }
80
81 pub unsafe fn new_unchecked(buffer: ScalarBuffer<O>) -> Self {
88 Self(buffer)
89 }
90
91 pub fn new_empty() -> Self {
93 let buffer = MutableBuffer::from_len_zeroed(std::mem::size_of::<O>());
94 Self(buffer.into_buffer().into())
95 }
96
97 pub fn new_zeroed(len: usize) -> Self {
99 let len_bytes = len
100 .checked_add(1)
101 .and_then(|o| o.checked_mul(std::mem::size_of::<O>()))
102 .expect("overflow");
103 let buffer = MutableBuffer::from_len_zeroed(len_bytes);
104 Self(buffer.into_buffer().into())
105 }
106
107 pub fn from_lengths<I>(lengths: I) -> Self
122 where
123 I: IntoIterator<Item = usize>,
124 {
125 let iter = lengths.into_iter();
126 let mut out = Vec::with_capacity(iter.size_hint().0 + 1);
127 out.push(O::usize_as(0));
128
129 let mut acc = 0_usize;
130 for length in iter {
131 acc = acc.checked_add(length).expect("usize overflow");
132 out.push(O::usize_as(acc))
133 }
134 O::from_usize(acc).expect("offset overflow");
136 Self(out.into())
137 }
138
139 pub fn from_repeated_length(length: usize, n: usize) -> Self {
154 if n == 0 {
155 return Self::new_empty();
156 }
157
158 if length == 0 {
159 return Self::new_zeroed(n);
160 }
161
162 length.checked_mul(n).expect("usize overflow");
165
166 O::from_usize(length * n).expect("offset overflow");
168
169 let offsets = (0..=n)
170 .map(|index| O::usize_as(index * length))
171 .collect::<Vec<O>>();
172
173 Self(ScalarBuffer::from(offsets))
174 }
175
176 pub fn lengths(&self) -> impl ExactSizeIterator<Item = usize> + '_ {
205 self.0.windows(2).map(|x| x[1].as_usize() - x[0].as_usize())
206 }
207
208 pub fn shrink_to_fit(&mut self) {
210 self.0.shrink_to_fit();
211 }
212
213 pub fn inner(&self) -> &ScalarBuffer<O> {
215 &self.0
216 }
217
218 pub fn into_inner(self) -> ScalarBuffer<O> {
220 self.0
221 }
222
223 #[cfg(feature = "pool")]
225 pub fn claim(&self, pool: &dyn crate::MemoryPool) {
226 self.0.claim(pool);
227 }
228
229 pub fn slice(&self, offset: usize, len: usize) -> Self {
231 Self(self.0.slice(offset, len.saturating_add(1)))
232 }
233
234 #[inline]
238 pub fn ptr_eq(&self, other: &Self) -> bool {
239 self.0.ptr_eq(&other.0)
240 }
241
242 pub fn has_non_empty_nulls(&self, null_buffer: Option<&NullBuffer>) -> bool {
278 let Some(null_buffer) = null_buffer else {
279 return false;
280 };
281
282 assert_eq!(
283 self.len() - 1,
284 null_buffer.len(),
285 "The length of the offsets should be 1 more than the length of the null buffer"
286 );
287
288 if null_buffer.null_count() == 0 {
289 return false;
290 }
291
292 let initial_offset = self[0];
294 let last_offset = self[self.len() - 1];
295
296 if null_buffer.null_count() == self.len() - 1 {
298 return last_offset != initial_offset;
299 }
300
301 let mut valid_slices_iter = null_buffer.valid_slices();
302
303 let (start, end) = valid_slices_iter.next().unwrap();
305
306 if self[start] != initial_offset {
308 return true;
309 }
310
311 let mut end_offset_of_last_valid_value = self[end];
314
315 for (start, end) in valid_slices_iter {
316 if self[start] != end_offset_of_last_valid_value {
319 return true;
320 }
321
322 end_offset_of_last_valid_value = self[end];
325 }
326
327 end_offset_of_last_valid_value != last_offset
328 }
329}
330
331impl<T: ArrowNativeType> Deref for OffsetBuffer<T> {
332 type Target = [T];
333
334 #[inline]
335 fn deref(&self) -> &Self::Target {
336 &self.0
337 }
338}
339
340impl<T: ArrowNativeType> AsRef<[T]> for OffsetBuffer<T> {
341 #[inline]
342 fn as_ref(&self) -> &[T] {
343 self
344 }
345}
346
347impl<O: ArrowNativeType> From<OffsetBufferBuilder<O>> for OffsetBuffer<O> {
348 fn from(value: OffsetBufferBuilder<O>) -> Self {
349 value.finish()
350 }
351}
352
353impl<O: ArrowNativeType> Default for OffsetBuffer<O> {
354 fn default() -> Self {
355 Self::new_empty()
356 }
357}
358
359#[cfg(test)]
360mod tests {
361 use super::*;
362
363 #[test]
364 #[should_panic(expected = "offsets cannot be empty")]
365 fn empty_offsets() {
366 OffsetBuffer::new(Vec::<i32>::new().into());
367 }
368
369 #[test]
370 #[should_panic(expected = "offsets must be greater than 0")]
371 fn negative_offsets() {
372 OffsetBuffer::new(vec![-1, 0, 1].into());
373 }
374
375 #[test]
376 fn offsets() {
377 OffsetBuffer::new(vec![0, 1, 2, 3].into());
378
379 let offsets = OffsetBuffer::<i32>::new_zeroed(3);
380 assert_eq!(offsets.as_ref(), &[0; 4]);
381
382 let offsets = OffsetBuffer::<i32>::new_zeroed(0);
383 assert_eq!(offsets.as_ref(), &[0; 1]);
384 }
385
386 #[test]
387 #[should_panic(expected = "overflow")]
388 fn offsets_new_zeroed_overflow() {
389 OffsetBuffer::<i32>::new_zeroed(usize::MAX);
390 }
391
392 #[test]
393 #[should_panic(expected = "offsets must be monotonically increasing")]
394 fn non_monotonic_offsets() {
395 OffsetBuffer::new(vec![1, 2, 0].into());
396 }
397
398 #[test]
399 fn from_lengths() {
400 let buffer = OffsetBuffer::<i32>::from_lengths([2, 6, 3, 7, 2]);
401 assert_eq!(buffer.as_ref(), &[0, 2, 8, 11, 18, 20]);
402
403 let half_max = i32::MAX / 2;
404 let buffer = OffsetBuffer::<i32>::from_lengths([half_max as usize, half_max as usize]);
405 assert_eq!(buffer.as_ref(), &[0, half_max, half_max * 2]);
406 }
407
408 #[test]
409 #[should_panic(expected = "offset overflow")]
410 fn from_lengths_offset_overflow() {
411 OffsetBuffer::<i32>::from_lengths([i32::MAX as usize, 1]);
412 }
413
414 #[test]
415 #[should_panic(expected = "usize overflow")]
416 fn from_lengths_usize_overflow() {
417 OffsetBuffer::<i32>::from_lengths([usize::MAX, 1]);
418 }
419
420 #[test]
421 #[should_panic(expected = "offset overflow")]
422 fn from_repeated_lengths_offset_length_overflow() {
423 OffsetBuffer::<i32>::from_repeated_length(i32::MAX as usize / 4, 5);
424 }
425
426 #[test]
427 #[should_panic(expected = "offset overflow")]
428 fn from_repeated_lengths_offset_repeat_overflow() {
429 OffsetBuffer::<i32>::from_repeated_length(1, i32::MAX as usize + 1);
430 }
431
432 #[test]
433 #[should_panic(expected = "offset overflow")]
434 fn from_repeated_lengths_usize_length_overflow() {
435 OffsetBuffer::<i32>::from_repeated_length(usize::MAX, 1);
436 }
437
438 #[test]
439 #[should_panic(expected = "usize overflow")]
440 fn from_repeated_lengths_usize_length_usize_overflow() {
441 OffsetBuffer::<i32>::from_repeated_length(usize::MAX, 2);
442 }
443
444 #[test]
445 #[should_panic(expected = "offset overflow")]
446 fn from_repeated_lengths_usize_repeat_overflow() {
447 OffsetBuffer::<i32>::from_repeated_length(1, usize::MAX);
448 }
449
450 #[test]
451 fn get_lengths() {
452 let offsets = OffsetBuffer::<i32>::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9]));
453 assert_eq!(offsets.lengths().collect::<Vec<usize>>(), vec![1, 3, 5]);
454 }
455
456 #[test]
457 fn get_lengths_should_be_with_fixed_size() {
458 let offsets = OffsetBuffer::<i32>::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9]));
459 let iter = offsets.lengths();
460 assert_eq!(iter.size_hint(), (3, Some(3)));
461 assert_eq!(iter.len(), 3);
462 }
463
464 #[test]
465 fn get_lengths_from_empty_offset_buffer_should_be_empty_iterator() {
466 let offsets = OffsetBuffer::<i32>::new_empty();
467 assert_eq!(offsets.lengths().collect::<Vec<usize>>(), vec![]);
468 }
469
470 #[test]
471 fn impl_eq() {
472 fn are_equal<T: Eq>(a: &T, b: &T) -> bool {
473 a.eq(b)
474 }
475
476 assert!(
477 are_equal(
478 &OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9])),
479 &OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9]))
480 ),
481 "OffsetBuffer should implement Eq."
482 );
483 }
484
485 #[test]
486 fn impl_default() {
487 let default = OffsetBuffer::<i32>::default();
488 assert_eq!(default.as_ref(), &[0]);
489 }
490
491 #[test]
492 fn from_repeated_length_basic() {
493 let buffer = OffsetBuffer::<i32>::from_repeated_length(4, 3);
495 assert_eq!(buffer.as_ref(), &[0, 4, 8, 12]);
496
497 let lengths: Vec<usize> = buffer.lengths().collect();
499 assert_eq!(lengths, vec![4, 4, 4]);
500 }
501
502 #[test]
503 fn from_repeated_length_single_repeat() {
504 let buffer = OffsetBuffer::<i32>::from_repeated_length(5, 1);
506 assert_eq!(buffer.as_ref(), &[0, 5]);
507
508 let lengths: Vec<usize> = buffer.lengths().collect();
509 assert_eq!(lengths, vec![5]);
510 }
511
512 #[test]
513 fn from_repeated_length_zero_repeats() {
514 let buffer = OffsetBuffer::<i32>::from_repeated_length(10, 0);
515 assert_eq!(buffer, OffsetBuffer::<i32>::new_empty());
516 }
517
518 #[test]
519 fn from_repeated_length_zero_length() {
520 let buffer = OffsetBuffer::<i32>::from_repeated_length(0, 5);
522 assert_eq!(buffer.as_ref(), &[0, 0, 0, 0, 0, 0]);
523
524 let lengths: Vec<usize> = buffer.lengths().collect();
526 assert_eq!(lengths, vec![0, 0, 0, 0, 0]);
527 }
528
529 #[test]
530 fn from_repeated_length_large_values() {
531 let buffer = OffsetBuffer::<i32>::from_repeated_length(1000, 100);
533 assert_eq!(buffer[0], 0);
534
535 let lengths: Vec<usize> = buffer.lengths().collect();
537 assert_eq!(lengths.len(), 100);
538 assert!(lengths.iter().all(|&len| len == 1000));
539 }
540
541 #[test]
542 fn from_repeated_length_unit_length() {
543 let buffer = OffsetBuffer::<i32>::from_repeated_length(1, 10);
545 assert_eq!(buffer.as_ref(), &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
546
547 let lengths: Vec<usize> = buffer.lengths().collect();
548 assert_eq!(lengths, vec![1; 10]);
549 }
550
551 #[test]
552 fn from_repeated_length_max_safe_values() {
553 let third_max = (i32::MAX / 3) as usize;
556 let buffer = OffsetBuffer::<i32>::from_repeated_length(third_max, 2);
557 assert_eq!(
558 buffer.as_ref(),
559 &[0, third_max as i32, (third_max * 2) as i32]
560 );
561 }
562
563 #[test]
568 fn has_non_empty_nulls_none_null_buffer() {
569 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 5, 8]));
571 assert!(!offsets.has_non_empty_nulls(None));
572 }
573
574 #[test]
575 fn has_non_empty_nulls_all_valid() {
576 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 5, 8]));
578 let nulls = NullBuffer::new_valid(3);
579 assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
580 }
581
582 #[test]
583 fn has_non_empty_nulls_all_null_empty_offsets() {
584 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0, 0, 0]));
586 let nulls = NullBuffer::new_null(3);
587 assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
588 }
589
590 #[test]
591 fn has_non_empty_nulls_all_null_non_empty_offsets() {
592 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 2, 5, 7]));
594 let nulls = NullBuffer::new_null(3);
595 assert!(offsets.has_non_empty_nulls(Some(&nulls)));
596 }
597
598 #[test]
599 fn has_non_empty_nulls_all_null_nonzero_but_equal_offsets() {
600 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![5, 5, 5]));
602 let nulls = NullBuffer::new_null(2);
603 assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
604 }
605
606 #[test]
607 fn has_non_empty_nulls_leading_nulls_with_data() {
608 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 5, 8]));
612 let nulls = NullBuffer::from(vec![false, true, true]);
613 assert!(offsets.has_non_empty_nulls(Some(&nulls)));
614 }
615
616 #[test]
617 fn has_non_empty_nulls_leading_nulls_without_data() {
618 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0, 3, 6]));
622 let nulls = NullBuffer::from(vec![false, true, true]);
623 assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
624 }
625
626 #[test]
627 fn has_non_empty_nulls_only_trailing_null_has_data() {
628 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0, 3, 6, 8]));
632 let nulls = NullBuffer::from(vec![false, true, true, false]);
633 assert!(offsets.has_non_empty_nulls(Some(&nulls)));
634 }
635
636 #[test]
637 fn has_non_empty_nulls_trailing_nulls_without_data() {
638 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 6, 6]));
642 let nulls = NullBuffer::from(vec![true, true, false]);
643 assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
644 }
645
646 #[test]
647 fn has_non_empty_nulls_middle_nulls_with_data() {
648 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 7, 10]));
652 let nulls = NullBuffer::from(vec![true, false, true]);
653 assert!(offsets.has_non_empty_nulls(Some(&nulls)));
654 }
655
656 #[test]
657 fn has_non_empty_nulls_middle_nulls_without_data() {
658 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 3, 6]));
662 let nulls = NullBuffer::from(vec![true, false, true]);
663 assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
664 }
665
666 #[test]
667 fn has_non_empty_nulls_alternating_null_valid_all_empty() {
668 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0, 3, 3, 6, 6]));
672 let nulls = NullBuffer::from(vec![false, true, false, true, false]);
673 assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
674
675 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0, 3, 3, 6, 6, 9]));
677 let nulls = NullBuffer::from(vec![false, true, false, true, false, true]);
678 assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
679 }
680
681 #[test]
682 fn has_non_empty_nulls_multiple_null_regions_second_has_data() {
683 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0, 3, 5, 6]));
687 let nulls = NullBuffer::from(vec![false, true, false, true]);
688 assert!(offsets.has_non_empty_nulls(Some(&nulls)));
689 }
690
691 #[test]
692 fn has_non_empty_nulls_multiple_null_regions_later_gap_has_data() {
693 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0, 3, 3, 6, 8, 10]));
700 let nulls = NullBuffer::from(vec![false, true, false, true, false, true]);
701 assert!(offsets.has_non_empty_nulls(Some(&nulls)));
702 }
703
704 #[test]
705 fn has_non_empty_nulls_single_element_null_empty() {
706 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0]));
708 let nulls = NullBuffer::new_null(1);
709 assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
710 }
711
712 #[test]
713 fn has_non_empty_nulls_single_element_null_non_empty() {
714 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 5]));
716 let nulls = NullBuffer::new_null(1);
717 assert!(offsets.has_non_empty_nulls(Some(&nulls)));
718 }
719
720 #[test]
721 fn has_non_empty_nulls_single_element_valid() {
722 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 5]));
724 let nulls = NullBuffer::new_valid(1);
725 assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
726 }
727
728 #[test]
729 fn has_non_empty_nulls_consecutive_nulls_between_valid_slices() {
730 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 2, 2, 2, 5, 8]));
735 let nulls = NullBuffer::from(vec![true, false, false, true, true]);
736 assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
737 }
738
739 #[test]
740 fn has_non_empty_nulls_consecutive_nulls_between_valid_slices_with_data() {
741 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 2, 3, 4, 5, 8]));
748 let nulls = NullBuffer::from(vec![true, false, false, true, true]);
749 assert!(offsets.has_non_empty_nulls(Some(&nulls)));
750 }
751
752 #[test]
753 fn has_non_empty_nulls_nonzero_initial_offset_all_null_equal() {
754 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![10, 10, 10]));
756 let nulls = NullBuffer::new_null(2);
757 assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
758 }
759
760 #[test]
761 fn has_non_empty_nulls_nonzero_initial_offset_with_data() {
762 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![10, 15, 20]));
766 let nulls = NullBuffer::from(vec![false, true]);
767 assert!(offsets.has_non_empty_nulls(Some(&nulls)));
768 }
769
770 #[test]
771 fn has_non_empty_nulls_sliced_no_nulls_in_null_region() {
772 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 3, 6, 6, 9]));
776 let sliced = offsets.slice(1, 3);
777 let nulls = NullBuffer::from(vec![false, true, false]);
778 assert!(!sliced.has_non_empty_nulls(Some(&nulls)));
779 }
780
781 #[test]
782 fn has_non_empty_nulls_sliced_null_has_data() {
783 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 7, 10, 15]));
787 let sliced = offsets.slice(1, 2);
788 let nulls = NullBuffer::from(vec![false, true]);
789 assert!(sliced.has_non_empty_nulls(Some(&nulls)));
790 }
791
792 #[test]
793 #[should_panic(
794 expected = "The length of the offsets should be 1 more than the length of the null buffer"
795 )]
796 fn has_non_empty_nulls_all_valid_mismatched_lengths_too_short() {
797 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 5, 8]));
799 let nulls = NullBuffer::new_valid(2); offsets.has_non_empty_nulls(Some(&nulls));
801 }
802
803 #[test]
804 #[should_panic(
805 expected = "The length of the offsets should be 1 more than the length of the null buffer"
806 )]
807 fn has_non_empty_nulls_all_valid_mismatched_lengths_too_long() {
808 let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 5, 8]));
810 let nulls = NullBuffer::new_valid(5); offsets.has_non_empty_nulls(Some(&nulls));
812 }
813}