1use std::any::Any;
19use std::sync::Arc;
20
21use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, RunEndBuffer};
22use arrow_data::{ArrayData, ArrayDataBuilder};
23use arrow_schema::{ArrowError, DataType, Field};
24
25use crate::{
26 Array, ArrayAccessor, ArrayRef, PrimitiveArray,
27 builder::StringRunBuilder,
28 make_array,
29 run_iterator::RunArrayIter,
30 types::{Int16Type, Int32Type, Int64Type, RunEndIndexType},
31};
32
33pub struct RunArray<R: RunEndIndexType> {
66 data_type: DataType,
67 run_ends: RunEndBuffer<R::Native>,
68 values: ArrayRef,
69}
70
71impl<R: RunEndIndexType> Clone for RunArray<R> {
72 fn clone(&self) -> Self {
73 Self {
74 data_type: self.data_type.clone(),
75 run_ends: self.run_ends.clone(),
76 values: self.values.clone(),
77 }
78 }
79}
80
81impl<R: RunEndIndexType> RunArray<R> {
82 pub fn logical_len(run_ends: &PrimitiveArray<R>) -> usize {
85 let len = run_ends.len();
86 if len == 0 {
87 return 0;
88 }
89 run_ends.value(len - 1).as_usize()
90 }
91
92 pub fn try_new(run_ends: &PrimitiveArray<R>, values: &dyn Array) -> Result<Self, ArrowError> {
100 let run_ends_type = run_ends.data_type().clone();
101 let values_type = values.data_type().clone();
102 let ree_array_type = DataType::RunEndEncoded(
103 Arc::new(Field::new("run_ends", run_ends_type, false)),
104 Arc::new(Field::new("values", values_type, true)),
105 );
106 let len = RunArray::logical_len(run_ends);
107 let builder = ArrayDataBuilder::new(ree_array_type)
108 .len(len)
109 .add_child_data(run_ends.to_data())
110 .add_child_data(values.to_data());
111
112 let array_data = unsafe { builder.build_unchecked() };
114
115 array_data.validate_data()?;
122
123 Ok(array_data.into())
124 }
125
126 pub fn run_ends(&self) -> &RunEndBuffer<R::Native> {
128 &self.run_ends
129 }
130
131 pub fn values(&self) -> &ArrayRef {
136 &self.values
137 }
138
139 pub fn get_start_physical_index(&self) -> usize {
143 self.run_ends.get_start_physical_index()
144 }
145
146 pub fn get_end_physical_index(&self) -> usize {
150 self.run_ends.get_end_physical_index()
151 }
152
153 pub fn downcast<V: 'static>(&self) -> Option<TypedRunArray<'_, R, V>> {
166 let values = self.values.as_any().downcast_ref()?;
167 Some(TypedRunArray {
168 run_array: self,
169 values,
170 })
171 }
172
173 pub fn get_physical_index(&self, logical_index: usize) -> usize {
177 self.run_ends.get_physical_index(logical_index)
178 }
179
180 #[inline]
184 pub fn get_physical_indices<I>(&self, logical_indices: &[I]) -> Result<Vec<usize>, ArrowError>
185 where
186 I: ArrowNativeType,
187 {
188 self.run_ends()
189 .get_physical_indices(logical_indices)
190 .map_err(|index| {
191 ArrowError::InvalidArgumentError(format!(
192 "Logical index {} is out of bounds for RunArray of length {}",
193 index.as_usize(),
194 self.len()
195 ))
196 })
197 }
198
199 pub fn slice(&self, offset: usize, length: usize) -> Self {
205 Self {
206 data_type: self.data_type.clone(),
207 run_ends: self.run_ends.slice(offset, length),
208 values: self.values.clone(),
209 }
210 }
211}
212
213impl<R: RunEndIndexType> From<ArrayData> for RunArray<R> {
214 fn from(data: ArrayData) -> Self {
216 match data.data_type() {
217 DataType::RunEndEncoded(_, _) => {}
218 _ => {
219 panic!(
220 "Invalid data type for RunArray. The data type should be DataType::RunEndEncoded"
221 );
222 }
223 }
224
225 let child = &data.child_data()[0];
228 assert_eq!(child.data_type(), &R::DATA_TYPE, "Incorrect run ends type");
229 let run_ends = unsafe {
230 let scalar = child.buffers()[0].clone().into();
231 RunEndBuffer::new_unchecked(scalar, data.offset(), data.len())
232 };
233
234 let values = make_array(data.child_data()[1].clone());
235 Self {
236 data_type: data.data_type().clone(),
237 run_ends,
238 values,
239 }
240 }
241}
242
243impl<R: RunEndIndexType> From<RunArray<R>> for ArrayData {
244 fn from(array: RunArray<R>) -> Self {
245 let len = array.run_ends.len();
246 let offset = array.run_ends.offset();
247
248 let run_ends = ArrayDataBuilder::new(R::DATA_TYPE)
249 .len(array.run_ends.values().len())
250 .buffers(vec![array.run_ends.into_inner().into_inner()]);
251
252 let run_ends = unsafe { run_ends.build_unchecked() };
253
254 let builder = ArrayDataBuilder::new(array.data_type)
255 .len(len)
256 .offset(offset)
257 .child_data(vec![run_ends, array.values.to_data()]);
258
259 unsafe { builder.build_unchecked() }
260 }
261}
262
263unsafe impl<T: RunEndIndexType> Array for RunArray<T> {
265 fn as_any(&self) -> &dyn Any {
266 self
267 }
268
269 fn to_data(&self) -> ArrayData {
270 self.clone().into()
271 }
272
273 fn into_data(self) -> ArrayData {
274 self.into()
275 }
276
277 fn data_type(&self) -> &DataType {
278 &self.data_type
279 }
280
281 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
282 Arc::new(self.slice(offset, length))
283 }
284
285 fn len(&self) -> usize {
286 self.run_ends.len()
287 }
288
289 fn is_empty(&self) -> bool {
290 self.run_ends.is_empty()
291 }
292
293 fn shrink_to_fit(&mut self) {
294 self.run_ends.shrink_to_fit();
295 self.values.shrink_to_fit();
296 }
297
298 fn offset(&self) -> usize {
299 self.run_ends.offset()
300 }
301
302 fn nulls(&self) -> Option<&NullBuffer> {
303 None
304 }
305
306 fn logical_nulls(&self) -> Option<NullBuffer> {
307 let len = self.len();
308 let nulls = self.values.logical_nulls()?;
309 let mut out = BooleanBufferBuilder::new(len);
310 let offset = self.run_ends.offset();
311 let mut valid_start = 0;
312 let mut last_end = 0;
313 for (idx, end) in self.run_ends.values().iter().enumerate() {
314 let end = end.as_usize();
315 if end < offset {
316 continue;
317 }
318 let end = (end - offset).min(len);
319 if nulls.is_null(idx) {
320 if valid_start < last_end {
321 out.append_n(last_end - valid_start, true);
322 }
323 out.append_n(end - last_end, false);
324 valid_start = end;
325 }
326 last_end = end;
327 if end == len {
328 break;
329 }
330 }
331 if valid_start < len {
332 out.append_n(len - valid_start, true)
333 }
334 assert_eq!(out.len(), len);
336 Some(out.finish().into())
337 }
338
339 fn is_nullable(&self) -> bool {
340 !self.is_empty() && self.values.is_nullable()
341 }
342
343 fn get_buffer_memory_size(&self) -> usize {
344 self.run_ends.inner().inner().capacity() + self.values.get_buffer_memory_size()
345 }
346
347 fn get_array_memory_size(&self) -> usize {
348 std::mem::size_of::<Self>()
349 + self.run_ends.inner().inner().capacity()
350 + self.values.get_array_memory_size()
351 }
352}
353
354impl<R: RunEndIndexType> std::fmt::Debug for RunArray<R> {
355 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
356 writeln!(
357 f,
358 "RunArray {{run_ends: {:?}, values: {:?}}}",
359 self.run_ends.values(),
360 self.values
361 )
362 }
363}
364
365impl<'a, T: RunEndIndexType> FromIterator<Option<&'a str>> for RunArray<T> {
382 fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
383 let it = iter.into_iter();
384 let (lower, _) = it.size_hint();
385 let mut builder = StringRunBuilder::with_capacity(lower, 256);
386 it.for_each(|i| {
387 builder.append_option(i);
388 });
389
390 builder.finish()
391 }
392}
393
394impl<'a, T: RunEndIndexType> FromIterator<&'a str> for RunArray<T> {
409 fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
410 let it = iter.into_iter();
411 let (lower, _) = it.size_hint();
412 let mut builder = StringRunBuilder::with_capacity(lower, 256);
413 it.for_each(|i| {
414 builder.append_value(i);
415 });
416
417 builder.finish()
418 }
419}
420
421pub type Int16RunArray = RunArray<Int16Type>;
435
436pub type Int32RunArray = RunArray<Int32Type>;
450
451pub type Int64RunArray = RunArray<Int64Type>;
465
466pub struct TypedRunArray<'a, R: RunEndIndexType, V> {
484 run_array: &'a RunArray<R>,
486
487 values: &'a V,
489}
490
491impl<R: RunEndIndexType, V> Clone for TypedRunArray<'_, R, V> {
493 fn clone(&self) -> Self {
494 *self
495 }
496}
497
498impl<R: RunEndIndexType, V> Copy for TypedRunArray<'_, R, V> {}
499
500impl<R: RunEndIndexType, V> std::fmt::Debug for TypedRunArray<'_, R, V> {
501 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
502 writeln!(f, "TypedRunArray({:?})", self.run_array)
503 }
504}
505
506impl<'a, R: RunEndIndexType, V> TypedRunArray<'a, R, V> {
507 pub fn run_ends(&self) -> &'a RunEndBuffer<R::Native> {
509 self.run_array.run_ends()
510 }
511
512 pub fn values(&self) -> &'a V {
514 self.values
515 }
516
517 pub fn run_array(&self) -> &'a RunArray<R> {
519 self.run_array
520 }
521}
522
523unsafe impl<R: RunEndIndexType, V: Sync> Array for TypedRunArray<'_, R, V> {
525 fn as_any(&self) -> &dyn Any {
526 self.run_array
527 }
528
529 fn to_data(&self) -> ArrayData {
530 self.run_array.to_data()
531 }
532
533 fn into_data(self) -> ArrayData {
534 self.run_array.into_data()
535 }
536
537 fn data_type(&self) -> &DataType {
538 self.run_array.data_type()
539 }
540
541 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
542 Arc::new(self.run_array.slice(offset, length))
543 }
544
545 fn len(&self) -> usize {
546 self.run_array.len()
547 }
548
549 fn is_empty(&self) -> bool {
550 self.run_array.is_empty()
551 }
552
553 fn offset(&self) -> usize {
554 self.run_array.offset()
555 }
556
557 fn nulls(&self) -> Option<&NullBuffer> {
558 self.run_array.nulls()
559 }
560
561 fn logical_nulls(&self) -> Option<NullBuffer> {
562 self.run_array.logical_nulls()
563 }
564
565 fn logical_null_count(&self) -> usize {
566 self.run_array.logical_null_count()
567 }
568
569 fn is_nullable(&self) -> bool {
570 self.run_array.is_nullable()
571 }
572
573 fn get_buffer_memory_size(&self) -> usize {
574 self.run_array.get_buffer_memory_size()
575 }
576
577 fn get_array_memory_size(&self) -> usize {
578 self.run_array.get_array_memory_size()
579 }
580}
581
582impl<'a, R, V> ArrayAccessor for TypedRunArray<'a, R, V>
585where
586 R: RunEndIndexType,
587 V: Sync + Send,
588 &'a V: ArrayAccessor,
589 <&'a V as ArrayAccessor>::Item: Default,
590{
591 type Item = <&'a V as ArrayAccessor>::Item;
592
593 fn value(&self, logical_index: usize) -> Self::Item {
594 assert!(
595 logical_index < self.len(),
596 "Trying to access an element at index {} from a TypedRunArray of length {}",
597 logical_index,
598 self.len()
599 );
600 unsafe { self.value_unchecked(logical_index) }
601 }
602
603 unsafe fn value_unchecked(&self, logical_index: usize) -> Self::Item {
604 let physical_index = self.run_array.get_physical_index(logical_index);
605 unsafe { self.values().value_unchecked(physical_index) }
606 }
607}
608
609impl<'a, R, V> IntoIterator for TypedRunArray<'a, R, V>
610where
611 R: RunEndIndexType,
612 V: Sync + Send,
613 &'a V: ArrayAccessor,
614 <&'a V as ArrayAccessor>::Item: Default,
615{
616 type Item = Option<<&'a V as ArrayAccessor>::Item>;
617 type IntoIter = RunArrayIter<'a, R, V>;
618
619 fn into_iter(self) -> Self::IntoIter {
620 RunArrayIter::new(self)
621 }
622}
623
624#[cfg(test)]
625mod tests {
626 use rand::Rng;
627 use rand::rng;
628 use rand::seq::SliceRandom;
629
630 use super::*;
631 use crate::builder::PrimitiveRunBuilder;
632 use crate::cast::AsArray;
633 use crate::types::{Int8Type, UInt32Type};
634 use crate::{Int16Array, Int32Array, StringArray};
635
636 fn build_input_array(size: usize) -> Vec<Option<i32>> {
637 let mut seed: Vec<Option<i32>> = vec![
640 None,
641 None,
642 None,
643 Some(1),
644 Some(2),
645 Some(3),
646 Some(4),
647 Some(5),
648 Some(6),
649 Some(7),
650 Some(8),
651 Some(9),
652 ];
653 let mut result: Vec<Option<i32>> = Vec::with_capacity(size);
654 let mut ix = 0;
655 let mut rng = rng();
656 let max_run_length = 8_usize.min(1_usize.max(size / 2));
658 while result.len() < size {
659 if ix == 0 {
661 seed.shuffle(&mut rng);
662 }
663 let num = max_run_length.min(rng.random_range(1..=max_run_length));
665 for _ in 0..num {
666 result.push(seed[ix]);
667 }
668 ix += 1;
669 if ix == seed.len() {
670 ix = 0
671 }
672 }
673 result.resize(size, None);
674 result
675 }
676
677 fn compare_logical_and_physical_indices(
679 logical_indices: &[u32],
680 logical_array: &[Option<i32>],
681 physical_indices: &[usize],
682 physical_array: &PrimitiveArray<Int32Type>,
683 ) {
684 assert_eq!(logical_indices.len(), physical_indices.len());
685
686 logical_indices
688 .iter()
689 .map(|f| f.as_usize())
690 .zip(physical_indices.iter())
691 .for_each(|(logical_ix, physical_ix)| {
692 let expected = logical_array[logical_ix];
693 match expected {
694 Some(val) => {
695 assert!(physical_array.is_valid(*physical_ix));
696 let actual = physical_array.value(*physical_ix);
697 assert_eq!(val, actual);
698 }
699 None => {
700 assert!(physical_array.is_null(*physical_ix))
701 }
702 };
703 });
704 }
705 #[test]
706 fn test_run_array() {
707 let value_data =
709 PrimitiveArray::<Int8Type>::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
710
711 let run_ends_values = [4_i16, 6, 7, 9, 13, 18, 20, 22];
713 let run_ends_data =
714 PrimitiveArray::<Int16Type>::from_iter_values(run_ends_values.iter().copied());
715
716 let ree_array = RunArray::<Int16Type>::try_new(&run_ends_data, &value_data).unwrap();
718
719 assert_eq!(ree_array.len(), 22);
720 assert_eq!(ree_array.null_count(), 0);
721
722 let values = ree_array.values();
723 assert_eq!(value_data.into_data(), values.to_data());
724 assert_eq!(&DataType::Int8, values.data_type());
725
726 let run_ends = ree_array.run_ends();
727 assert_eq!(run_ends.values(), &run_ends_values);
728 }
729
730 #[test]
731 fn test_run_array_fmt_debug() {
732 let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(3);
733 builder.append_value(12345678);
734 builder.append_null();
735 builder.append_value(22345678);
736 let array = builder.finish();
737 assert_eq!(
738 "RunArray {run_ends: [1, 2, 3], values: PrimitiveArray<UInt32>\n[\n 12345678,\n null,\n 22345678,\n]}\n",
739 format!("{array:?}")
740 );
741
742 let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(20);
743 for _ in 0..20 {
744 builder.append_value(1);
745 }
746 let array = builder.finish();
747
748 assert_eq!(array.len(), 20);
749 assert_eq!(array.null_count(), 0);
750 assert_eq!(array.logical_null_count(), 0);
751
752 assert_eq!(
753 "RunArray {run_ends: [20], values: PrimitiveArray<UInt32>\n[\n 1,\n]}\n",
754 format!("{array:?}")
755 );
756 }
757
758 #[test]
759 fn test_run_array_from_iter() {
760 let test = vec!["a", "a", "b", "c"];
761 let array: RunArray<Int16Type> = test
762 .iter()
763 .map(|&x| if x == "b" { None } else { Some(x) })
764 .collect();
765 assert_eq!(
766 "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n \"a\",\n null,\n \"c\",\n]}\n",
767 format!("{array:?}")
768 );
769
770 assert_eq!(array.len(), 4);
771 assert_eq!(array.null_count(), 0);
772 assert_eq!(array.logical_null_count(), 1);
773
774 let array: RunArray<Int16Type> = test.into_iter().collect();
775 assert_eq!(
776 "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n \"a\",\n \"b\",\n \"c\",\n]}\n",
777 format!("{array:?}")
778 );
779 }
780
781 #[test]
782 fn test_run_array_run_ends_as_primitive_array() {
783 let test = vec!["a", "b", "c", "a"];
784 let array: RunArray<Int16Type> = test.into_iter().collect();
785
786 assert_eq!(array.len(), 4);
787 assert_eq!(array.null_count(), 0);
788 assert_eq!(array.logical_null_count(), 0);
789
790 let run_ends = array.run_ends();
791 assert_eq!(&[1, 2, 3, 4], run_ends.values());
792 }
793
794 #[test]
795 fn test_run_array_as_primitive_array_with_null() {
796 let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
797 let array: RunArray<Int32Type> = test.into_iter().collect();
798
799 assert_eq!(array.len(), 6);
800 assert_eq!(array.null_count(), 0);
801 assert_eq!(array.logical_null_count(), 3);
802
803 let run_ends = array.run_ends();
804 assert_eq!(&[1, 2, 3, 5, 6], run_ends.values());
805
806 let values_data = array.values();
807 assert_eq!(2, values_data.null_count());
808 assert_eq!(5, values_data.len());
809 }
810
811 #[test]
812 fn test_run_array_all_nulls() {
813 let test = vec![None, None, None];
814 let array: RunArray<Int32Type> = test.into_iter().collect();
815
816 assert_eq!(array.len(), 3);
817 assert_eq!(array.null_count(), 0);
818 assert_eq!(array.logical_null_count(), 3);
819
820 let run_ends = array.run_ends();
821 assert_eq!(3, run_ends.len());
822 assert_eq!(&[3], run_ends.values());
823
824 let values_data = array.values();
825 assert_eq!(1, values_data.null_count());
826 }
827
828 #[test]
829 fn test_run_array_try_new() {
830 let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
831 .into_iter()
832 .collect();
833 let run_ends: Int32Array = [Some(1), Some(2), Some(3), Some(4)].into_iter().collect();
834
835 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
836 assert_eq!(array.values().data_type(), &DataType::Utf8);
837
838 assert_eq!(array.null_count(), 0);
839 assert_eq!(array.logical_null_count(), 1);
840 assert_eq!(array.len(), 4);
841 assert_eq!(array.values().null_count(), 1);
842
843 assert_eq!(
844 "RunArray {run_ends: [1, 2, 3, 4], values: StringArray\n[\n \"foo\",\n \"bar\",\n null,\n \"baz\",\n]}\n",
845 format!("{array:?}")
846 );
847 }
848
849 #[test]
850 fn test_run_array_int16_type_definition() {
851 let array: Int16RunArray = vec!["a", "a", "b", "c", "c"].into_iter().collect();
852 let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", "c"]));
853 assert_eq!(array.run_ends().values(), &[2, 3, 5]);
854 assert_eq!(array.values(), &values);
855 }
856
857 #[test]
858 fn test_run_array_empty_string() {
859 let array: Int16RunArray = vec!["a", "a", "", "", "c"].into_iter().collect();
860 let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "", "c"]));
861 assert_eq!(array.run_ends().values(), &[2, 4, 5]);
862 assert_eq!(array.values(), &values);
863 }
864
865 #[test]
866 fn test_run_array_length_mismatch() {
867 let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
868 .into_iter()
869 .collect();
870 let run_ends: Int32Array = [Some(1), Some(2), Some(3)].into_iter().collect();
871
872 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
873 let expected = ArrowError::InvalidArgumentError("The run_ends array length should be the same as values array length. Run_ends array length is 3, values array length is 4".to_string());
874 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
875 }
876
877 #[test]
878 fn test_run_array_run_ends_with_null() {
879 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
880 .into_iter()
881 .collect();
882 let run_ends: Int32Array = [Some(1), None, Some(3)].into_iter().collect();
883
884 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
885 let expected = ArrowError::InvalidArgumentError(
886 "Found null values in run_ends array. The run_ends array should not have null values."
887 .to_string(),
888 );
889 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
890 }
891
892 #[test]
893 fn test_run_array_run_ends_with_zeroes() {
894 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
895 .into_iter()
896 .collect();
897 let run_ends: Int32Array = [Some(0), Some(1), Some(3)].into_iter().collect();
898
899 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
900 let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly positive. Found value 0 at index 0 that does not match the criteria.".to_string());
901 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
902 }
903
904 #[test]
905 fn test_run_array_run_ends_non_increasing() {
906 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
907 .into_iter()
908 .collect();
909 let run_ends: Int32Array = [Some(1), Some(4), Some(4)].into_iter().collect();
910
911 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
912 let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly increasing. Found value 4 at index 2 with previous value 4 that does not match the criteria.".to_string());
913 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
914 }
915
916 #[test]
917 #[should_panic(expected = "Incorrect run ends type")]
918 fn test_run_array_run_ends_data_type_mismatch() {
919 let a = RunArray::<Int32Type>::from_iter(["32"]);
920 let _ = RunArray::<Int64Type>::from(a.into_data());
921 }
922
923 #[test]
924 fn test_ree_array_accessor() {
925 let input_array = build_input_array(256);
926
927 let mut builder =
929 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
930 builder.extend(input_array.iter().copied());
931 let run_array = builder.finish();
932 let typed = run_array.downcast::<PrimitiveArray<Int32Type>>().unwrap();
933
934 for (i, inp_val) in input_array.iter().enumerate() {
936 if let Some(val) = inp_val {
937 let actual = typed.value(i);
938 assert_eq!(*val, actual)
939 } else {
940 let physical_ix = run_array.get_physical_index(i);
941 assert!(typed.values().is_null(physical_ix));
942 };
943 }
944 }
945
946 #[test]
947 #[cfg_attr(miri, ignore)] fn test_get_physical_indices() {
949 for logical_len in (0..250).step_by(10) {
951 let input_array = build_input_array(logical_len);
952
953 let mut builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
955 builder.extend(input_array.clone().into_iter());
956
957 let run_array = builder.finish();
958 let physical_values_array = run_array.values().as_primitive::<Int32Type>();
959
960 let mut logical_indices: Vec<u32> = (0_u32..(logical_len as u32)).collect();
962 logical_indices.append(&mut logical_indices.clone());
964 let mut rng = rng();
965 logical_indices.shuffle(&mut rng);
966
967 let physical_indices = run_array.get_physical_indices(&logical_indices).unwrap();
968
969 assert_eq!(logical_indices.len(), physical_indices.len());
970
971 compare_logical_and_physical_indices(
973 &logical_indices,
974 &input_array,
975 &physical_indices,
976 physical_values_array,
977 );
978 }
979 }
980
981 #[test]
982 #[cfg_attr(miri, ignore)] fn test_get_physical_indices_sliced() {
984 let total_len = 80;
985 let input_array = build_input_array(total_len);
986
987 let mut builder =
989 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
990 builder.extend(input_array.iter().copied());
991 let run_array = builder.finish();
992 let physical_values_array = run_array.values().as_primitive::<Int32Type>();
993
994 for slice_len in 1..=total_len {
996 let mut logical_indices: Vec<u32> = (0_u32..(slice_len as u32)).collect();
998 logical_indices.append(&mut logical_indices.clone());
1000 let mut rng = rng();
1001 logical_indices.shuffle(&mut rng);
1002
1003 let sliced_input_array = &input_array[0..slice_len];
1006
1007 let sliced_run_array: RunArray<Int16Type> =
1009 run_array.slice(0, slice_len).into_data().into();
1010
1011 let physical_indices = sliced_run_array
1013 .get_physical_indices(&logical_indices)
1014 .unwrap();
1015
1016 compare_logical_and_physical_indices(
1017 &logical_indices,
1018 sliced_input_array,
1019 &physical_indices,
1020 physical_values_array,
1021 );
1022
1023 let sliced_input_array = &input_array[total_len - slice_len..total_len];
1026
1027 let sliced_run_array: RunArray<Int16Type> = run_array
1029 .slice(total_len - slice_len, slice_len)
1030 .into_data()
1031 .into();
1032
1033 let physical_indices = sliced_run_array
1035 .get_physical_indices(&logical_indices)
1036 .unwrap();
1037
1038 compare_logical_and_physical_indices(
1039 &logical_indices,
1040 sliced_input_array,
1041 &physical_indices,
1042 physical_values_array,
1043 );
1044 }
1045 }
1046
1047 #[test]
1048 fn test_logical_nulls() {
1049 let run = Int32Array::from(vec![3, 6, 9, 12]);
1050 let values = Int32Array::from(vec![Some(0), None, Some(1), None]);
1051 let array = RunArray::try_new(&run, &values).unwrap();
1052
1053 let expected = [
1054 true, true, true, false, false, false, true, true, true, false, false, false,
1055 ];
1056
1057 let n = array.logical_nulls().unwrap();
1058 assert_eq!(n.null_count(), 6);
1059
1060 let slices = [(0, 12), (0, 2), (2, 5), (3, 0), (3, 3), (3, 4), (4, 8)];
1061 for (offset, length) in slices {
1062 let a = array.slice(offset, length);
1063 let n = a.logical_nulls().unwrap();
1064 let n = n.into_iter().collect::<Vec<_>>();
1065 assert_eq!(&n, &expected[offset..offset + length], "{offset} {length}");
1066 }
1067 }
1068
1069 #[test]
1070 fn test_run_array_eq_identical() {
1071 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1072 let values1 = StringArray::from(vec!["a", "b", "c"]);
1073 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1074
1075 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1076 let values2 = StringArray::from(vec!["a", "b", "c"]);
1077 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1078
1079 assert_eq!(array1, array2);
1080 }
1081
1082 #[test]
1083 fn test_run_array_ne_different_run_ends() {
1084 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1085 let values1 = StringArray::from(vec!["a", "b", "c"]);
1086 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1087
1088 let run_ends2 = Int32Array::from(vec![1, 4, 6]);
1089 let values2 = StringArray::from(vec!["a", "b", "c"]);
1090 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1091
1092 assert_ne!(array1, array2);
1093 }
1094
1095 #[test]
1096 fn test_run_array_ne_different_values() {
1097 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1098 let values1 = StringArray::from(vec!["a", "b", "c"]);
1099 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1100
1101 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1102 let values2 = StringArray::from(vec!["a", "b", "d"]);
1103 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1104
1105 assert_ne!(array1, array2);
1106 }
1107
1108 #[test]
1109 fn test_run_array_eq_with_nulls() {
1110 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1111 let values1 = StringArray::from(vec![Some("a"), None, Some("c")]);
1112 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1113
1114 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1115 let values2 = StringArray::from(vec![Some("a"), None, Some("c")]);
1116 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1117
1118 assert_eq!(array1, array2);
1119 }
1120
1121 #[test]
1122 fn test_run_array_eq_different_run_end_types() {
1123 let run_ends_i16_1 = Int16Array::from(vec![2_i16, 4, 6]);
1124 let values_i16_1 = StringArray::from(vec!["a", "b", "c"]);
1125 let array_i16_1 = RunArray::<Int16Type>::try_new(&run_ends_i16_1, &values_i16_1).unwrap();
1126
1127 let run_ends_i16_2 = Int16Array::from(vec![2_i16, 4, 6]);
1128 let values_i16_2 = StringArray::from(vec!["a", "b", "c"]);
1129 let array_i16_2 = RunArray::<Int16Type>::try_new(&run_ends_i16_2, &values_i16_2).unwrap();
1130
1131 assert_eq!(array_i16_1, array_i16_2);
1132 }
1133}