1use std::any::Any;
19use std::sync::Arc;
20
21use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, RunEndBuffer, ScalarBuffer};
22use arrow_data::{ArrayData, ArrayDataBuilder};
23use arrow_schema::{ArrowError, DataType, Field};
24
25use crate::{
26 Array, ArrayAccessor, ArrayRef, PrimitiveArray,
27 builder::StringRunBuilder,
28 make_array,
29 run_iterator::RunArrayIter,
30 types::{Int16Type, Int32Type, Int64Type, RunEndIndexType},
31};
32
33pub struct RunArray<R: RunEndIndexType> {
66 data_type: DataType,
67 run_ends: RunEndBuffer<R::Native>,
68 values: ArrayRef,
69}
70
71impl<R: RunEndIndexType> Clone for RunArray<R> {
72 fn clone(&self) -> Self {
73 Self {
74 data_type: self.data_type.clone(),
75 run_ends: self.run_ends.clone(),
76 values: self.values.clone(),
77 }
78 }
79}
80
81impl<R: RunEndIndexType> RunArray<R> {
82 pub fn logical_len(run_ends: &PrimitiveArray<R>) -> usize {
85 let len = run_ends.len();
86 if len == 0 {
87 return 0;
88 }
89 run_ends.value(len - 1).as_usize()
90 }
91
92 pub fn try_new(run_ends: &PrimitiveArray<R>, values: &dyn Array) -> Result<Self, ArrowError> {
100 let run_ends_type = run_ends.data_type().clone();
101 let values_type = values.data_type().clone();
102 let ree_array_type = DataType::RunEndEncoded(
103 Arc::new(Field::new("run_ends", run_ends_type, false)),
104 Arc::new(Field::new("values", values_type, true)),
105 );
106 let len = RunArray::logical_len(run_ends);
107 let builder = ArrayDataBuilder::new(ree_array_type)
108 .len(len)
109 .add_child_data(run_ends.to_data())
110 .add_child_data(values.to_data());
111
112 let array_data = unsafe { builder.build_unchecked() };
114
115 array_data.validate_data()?;
122
123 Ok(array_data.into())
124 }
125
126 pub unsafe fn new_unchecked(
132 data_type: DataType,
133 run_ends: RunEndBuffer<R::Native>,
134 values: ArrayRef,
135 ) -> Self {
136 if cfg!(feature = "force_validate") {
137 match &data_type {
138 DataType::RunEndEncoded(run_ends, values_field) => {
139 assert!(!run_ends.is_nullable(), "run_ends should not be nullable");
140 assert_eq!(
141 run_ends.data_type(),
142 &R::DATA_TYPE,
143 "Incorrect run ends type"
144 );
145 assert_eq!(
146 values_field.data_type(),
147 values.data_type(),
148 "Incorrect values type"
149 );
150 }
151 _ => {
152 panic!(
153 "Invalid data type {data_type:?} for RunArray. Should be DataType::RunEndEncoded"
154 );
155 }
156 }
157
158 let run_array = Self {
159 data_type,
160 run_ends,
161 values,
162 };
163
164 run_array
171 .to_data()
172 .validate_data()
173 .expect("RunArray data should be valid");
174
175 return run_array;
176 }
177
178 Self {
179 data_type,
180 run_ends,
181 values,
182 }
183 }
184
185 pub fn into_parts(self) -> (DataType, RunEndBuffer<R::Native>, ArrayRef) {
187 (self.data_type, self.run_ends, self.values)
188 }
189
190 pub fn run_ends(&self) -> &RunEndBuffer<R::Native> {
192 &self.run_ends
193 }
194
195 pub fn values(&self) -> &ArrayRef {
200 &self.values
201 }
202
203 pub fn values_slice(&self) -> ArrayRef {
208 if self.is_empty() {
209 return self.values.slice(0, 0);
210 }
211 let start = self.get_start_physical_index();
212 let end = self.get_end_physical_index();
213 self.values.slice(start, end - start + 1)
214 }
215
216 pub fn get_start_physical_index(&self) -> usize {
220 self.run_ends.get_start_physical_index()
221 }
222
223 pub fn get_end_physical_index(&self) -> usize {
227 self.run_ends.get_end_physical_index()
228 }
229
230 pub fn downcast<V: 'static>(&self) -> Option<TypedRunArray<'_, R, V>> {
243 let values = self.values.as_any().downcast_ref()?;
244 Some(TypedRunArray {
245 run_array: self,
246 values,
247 })
248 }
249
250 pub fn get_physical_index(&self, logical_index: usize) -> usize {
254 self.run_ends.get_physical_index(logical_index)
255 }
256
257 #[inline]
261 pub fn get_physical_indices<I>(&self, logical_indices: &[I]) -> Result<Vec<usize>, ArrowError>
262 where
263 I: ArrowNativeType,
264 {
265 self.run_ends()
266 .get_physical_indices(logical_indices)
267 .map_err(|index| {
268 ArrowError::InvalidArgumentError(format!(
269 "Logical index {} is out of bounds for RunArray of length {}",
270 index.as_usize(),
271 self.len()
272 ))
273 })
274 }
275
276 pub fn slice(&self, offset: usize, length: usize) -> Self {
282 Self {
283 data_type: self.data_type.clone(),
284 run_ends: self.run_ends.slice(offset, length),
285 values: self.values.clone(),
286 }
287 }
288}
289
290impl<R: RunEndIndexType> From<ArrayData> for RunArray<R> {
291 fn from(data: ArrayData) -> Self {
293 let (data_type, len, _nulls, offset, _buffers, child_data) = data.into_parts();
294
295 match &data_type {
296 DataType::RunEndEncoded(_, _) => {}
297 _ => {
298 panic!(
299 "Invalid data type {data_type:?} for RunArray. Should be DataType::RunEndEncoded"
300 );
301 }
302 }
303
304 let [run_end_child, values_child]: [ArrayData; 2] = child_data
305 .try_into()
306 .expect("RunArray data should have exactly two child arrays");
307
308 let (
310 run_end_data_type,
311 _run_end_len,
312 _run_end_nulls,
313 _run_end_offset,
314 run_end_buffers,
315 _run_end_child_data,
316 ) = run_end_child.into_parts();
317 assert_eq!(run_end_data_type, R::DATA_TYPE, "Incorrect run ends type");
318 let [run_end_buffer]: [arrow_buffer::Buffer; 1] = run_end_buffers
319 .try_into()
320 .expect("Run ends should have exactly one buffer");
321 let scalar = ScalarBuffer::from(run_end_buffer);
322 let run_ends = unsafe { RunEndBuffer::new_unchecked(scalar, offset, len) };
323
324 let values = make_array(values_child);
325
326 Self {
327 data_type,
328 run_ends,
329 values,
330 }
331 }
332}
333
334impl<R: RunEndIndexType> From<RunArray<R>> for ArrayData {
335 fn from(array: RunArray<R>) -> Self {
336 let len = array.run_ends.len();
337 let offset = array.run_ends.offset();
338
339 let run_ends = ArrayDataBuilder::new(R::DATA_TYPE)
340 .len(array.run_ends.values().len())
341 .buffers(vec![array.run_ends.into_inner().into_inner()]);
342
343 let run_ends = unsafe { run_ends.build_unchecked() };
344
345 let builder = ArrayDataBuilder::new(array.data_type)
346 .len(len)
347 .offset(offset)
348 .child_data(vec![run_ends, array.values.to_data()]);
349
350 unsafe { builder.build_unchecked() }
351 }
352}
353
354unsafe impl<T: RunEndIndexType> Array for RunArray<T> {
356 fn as_any(&self) -> &dyn Any {
357 self
358 }
359
360 fn to_data(&self) -> ArrayData {
361 self.clone().into()
362 }
363
364 fn into_data(self) -> ArrayData {
365 self.into()
366 }
367
368 fn data_type(&self) -> &DataType {
369 &self.data_type
370 }
371
372 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
373 Arc::new(self.slice(offset, length))
374 }
375
376 fn len(&self) -> usize {
377 self.run_ends.len()
378 }
379
380 fn is_empty(&self) -> bool {
381 self.run_ends.is_empty()
382 }
383
384 fn shrink_to_fit(&mut self) {
385 self.run_ends.shrink_to_fit();
386 self.values.shrink_to_fit();
387 }
388
389 fn offset(&self) -> usize {
390 self.run_ends.offset()
391 }
392
393 fn nulls(&self) -> Option<&NullBuffer> {
394 None
395 }
396
397 fn logical_nulls(&self) -> Option<NullBuffer> {
398 let len = self.len();
399 let nulls = self.values.logical_nulls()?;
400 let mut out = BooleanBufferBuilder::new(len);
401 let offset = self.run_ends.offset();
402 let mut valid_start = 0;
403 let mut last_end = 0;
404 for (idx, end) in self.run_ends.values().iter().enumerate() {
405 let end = end.as_usize();
406 if end < offset {
407 continue;
408 }
409 let end = (end - offset).min(len);
410 if nulls.is_null(idx) {
411 if valid_start < last_end {
412 out.append_n(last_end - valid_start, true);
413 }
414 out.append_n(end - last_end, false);
415 valid_start = end;
416 }
417 last_end = end;
418 if end == len {
419 break;
420 }
421 }
422 if valid_start < len {
423 out.append_n(len - valid_start, true)
424 }
425 assert_eq!(out.len(), len);
427 Some(out.finish().into())
428 }
429
430 fn is_nullable(&self) -> bool {
431 !self.is_empty() && self.values.is_nullable()
432 }
433
434 fn get_buffer_memory_size(&self) -> usize {
435 self.run_ends.inner().inner().capacity() + self.values.get_buffer_memory_size()
436 }
437
438 fn get_array_memory_size(&self) -> usize {
439 std::mem::size_of::<Self>()
440 + self.run_ends.inner().inner().capacity()
441 + self.values.get_array_memory_size()
442 }
443
444 #[cfg(feature = "pool")]
445 fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
446 self.run_ends.claim(pool);
447 self.values.claim(pool);
448 }
449}
450
451impl<R: RunEndIndexType> std::fmt::Debug for RunArray<R> {
452 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
453 writeln!(
454 f,
455 "RunArray {{run_ends: {:?}, values: {:?}}}",
456 self.run_ends.values(),
457 self.values
458 )
459 }
460}
461
462impl<'a, T: RunEndIndexType> FromIterator<Option<&'a str>> for RunArray<T> {
479 fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
480 let it = iter.into_iter();
481 let (lower, _) = it.size_hint();
482 let mut builder = StringRunBuilder::with_capacity(lower, 256);
483 it.for_each(|i| {
484 builder.append_option(i);
485 });
486
487 builder.finish()
488 }
489}
490
491impl<'a, T: RunEndIndexType> FromIterator<&'a str> for RunArray<T> {
506 fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
507 let it = iter.into_iter();
508 let (lower, _) = it.size_hint();
509 let mut builder = StringRunBuilder::with_capacity(lower, 256);
510 it.for_each(|i| {
511 builder.append_value(i);
512 });
513
514 builder.finish()
515 }
516}
517
518pub type Int16RunArray = RunArray<Int16Type>;
532
533pub type Int32RunArray = RunArray<Int32Type>;
547
548pub type Int64RunArray = RunArray<Int64Type>;
562
563pub struct TypedRunArray<'a, R: RunEndIndexType, V> {
581 run_array: &'a RunArray<R>,
583
584 values: &'a V,
586}
587
588impl<R: RunEndIndexType, V> Clone for TypedRunArray<'_, R, V> {
590 fn clone(&self) -> Self {
591 *self
592 }
593}
594
595impl<R: RunEndIndexType, V> Copy for TypedRunArray<'_, R, V> {}
596
597impl<R: RunEndIndexType, V> std::fmt::Debug for TypedRunArray<'_, R, V> {
598 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
599 writeln!(f, "TypedRunArray({:?})", self.run_array)
600 }
601}
602
603impl<'a, R: RunEndIndexType, V> TypedRunArray<'a, R, V> {
604 pub fn run_ends(&self) -> &'a RunEndBuffer<R::Native> {
606 self.run_array.run_ends()
607 }
608
609 pub fn values(&self) -> &'a V {
611 self.values
612 }
613
614 pub fn run_array(&self) -> &'a RunArray<R> {
616 self.run_array
617 }
618}
619
620unsafe impl<R: RunEndIndexType, V: Sync> Array for TypedRunArray<'_, R, V> {
622 fn as_any(&self) -> &dyn Any {
623 self.run_array
624 }
625
626 fn to_data(&self) -> ArrayData {
627 self.run_array.to_data()
628 }
629
630 fn into_data(self) -> ArrayData {
631 self.run_array.into_data()
632 }
633
634 fn data_type(&self) -> &DataType {
635 self.run_array.data_type()
636 }
637
638 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
639 Arc::new(self.run_array.slice(offset, length))
640 }
641
642 fn len(&self) -> usize {
643 self.run_array.len()
644 }
645
646 fn is_empty(&self) -> bool {
647 self.run_array.is_empty()
648 }
649
650 fn offset(&self) -> usize {
651 self.run_array.offset()
652 }
653
654 fn nulls(&self) -> Option<&NullBuffer> {
655 self.run_array.nulls()
656 }
657
658 fn logical_nulls(&self) -> Option<NullBuffer> {
659 self.run_array.logical_nulls()
660 }
661
662 fn logical_null_count(&self) -> usize {
663 self.run_array.logical_null_count()
664 }
665
666 fn is_nullable(&self) -> bool {
667 self.run_array.is_nullable()
668 }
669
670 fn get_buffer_memory_size(&self) -> usize {
671 self.run_array.get_buffer_memory_size()
672 }
673
674 fn get_array_memory_size(&self) -> usize {
675 self.run_array.get_array_memory_size()
676 }
677
678 #[cfg(feature = "pool")]
679 fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
680 self.run_array.claim(pool);
681 }
682}
683
684impl<'a, R, V> ArrayAccessor for TypedRunArray<'a, R, V>
687where
688 R: RunEndIndexType,
689 V: Sync + Send,
690 &'a V: ArrayAccessor,
691 <&'a V as ArrayAccessor>::Item: Default,
692{
693 type Item = <&'a V as ArrayAccessor>::Item;
694
695 fn value(&self, logical_index: usize) -> Self::Item {
696 assert!(
697 logical_index < self.len(),
698 "Trying to access an element at index {} from a TypedRunArray of length {}",
699 logical_index,
700 self.len()
701 );
702 unsafe { self.value_unchecked(logical_index) }
703 }
704
705 unsafe fn value_unchecked(&self, logical_index: usize) -> Self::Item {
706 let physical_index = self.run_array.get_physical_index(logical_index);
707 unsafe { self.values().value_unchecked(physical_index) }
708 }
709}
710
711impl<'a, R, V> IntoIterator for TypedRunArray<'a, R, V>
712where
713 R: RunEndIndexType,
714 V: Sync + Send,
715 &'a V: ArrayAccessor,
716 <&'a V as ArrayAccessor>::Item: Default,
717{
718 type Item = Option<<&'a V as ArrayAccessor>::Item>;
719 type IntoIter = RunArrayIter<'a, R, V>;
720
721 fn into_iter(self) -> Self::IntoIter {
722 RunArrayIter::new(self)
723 }
724}
725
726#[cfg(test)]
727mod tests {
728 use rand::Rng;
729 use rand::rng;
730 use rand::seq::SliceRandom;
731
732 use super::*;
733 use crate::builder::PrimitiveRunBuilder;
734 use crate::cast::AsArray;
735 use crate::new_empty_array;
736 use crate::types::{Int8Type, UInt32Type};
737 use crate::{Int16Array, Int32Array, StringArray};
738
739 fn build_input_array(size: usize) -> Vec<Option<i32>> {
740 let mut seed: Vec<Option<i32>> = vec![
743 None,
744 None,
745 None,
746 Some(1),
747 Some(2),
748 Some(3),
749 Some(4),
750 Some(5),
751 Some(6),
752 Some(7),
753 Some(8),
754 Some(9),
755 ];
756 let mut result: Vec<Option<i32>> = Vec::with_capacity(size);
757 let mut ix = 0;
758 let mut rng = rng();
759 let max_run_length = 8_usize.min(1_usize.max(size / 2));
761 while result.len() < size {
762 if ix == 0 {
764 seed.shuffle(&mut rng);
765 }
766 let num = max_run_length.min(rng.random_range(1..=max_run_length));
768 for _ in 0..num {
769 result.push(seed[ix]);
770 }
771 ix += 1;
772 if ix == seed.len() {
773 ix = 0
774 }
775 }
776 result.resize(size, None);
777 result
778 }
779
780 fn compare_logical_and_physical_indices(
782 logical_indices: &[u32],
783 logical_array: &[Option<i32>],
784 physical_indices: &[usize],
785 physical_array: &PrimitiveArray<Int32Type>,
786 ) {
787 assert_eq!(logical_indices.len(), physical_indices.len());
788
789 logical_indices
791 .iter()
792 .map(|f| f.as_usize())
793 .zip(physical_indices.iter())
794 .for_each(|(logical_ix, physical_ix)| {
795 let expected = logical_array[logical_ix];
796 match expected {
797 Some(val) => {
798 assert!(physical_array.is_valid(*physical_ix));
799 let actual = physical_array.value(*physical_ix);
800 assert_eq!(val, actual);
801 }
802 None => {
803 assert!(physical_array.is_null(*physical_ix))
804 }
805 };
806 });
807 }
808 #[test]
809 fn test_run_array() {
810 let value_data =
812 PrimitiveArray::<Int8Type>::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
813
814 let run_ends_values = [4_i16, 6, 7, 9, 13, 18, 20, 22];
816 let run_ends_data =
817 PrimitiveArray::<Int16Type>::from_iter_values(run_ends_values.iter().copied());
818
819 let ree_array = RunArray::<Int16Type>::try_new(&run_ends_data, &value_data).unwrap();
821
822 assert_eq!(ree_array.len(), 22);
823 assert_eq!(ree_array.null_count(), 0);
824
825 let values = ree_array.values();
826 assert_eq!(value_data.into_data(), values.to_data());
827 assert_eq!(&DataType::Int8, values.data_type());
828
829 let run_ends = ree_array.run_ends();
830 assert_eq!(run_ends.values(), &run_ends_values);
831 }
832
833 #[test]
834 fn test_run_array_empty() {
835 let runs = new_empty_array(&DataType::Int16);
836 let runs = runs.as_primitive::<Int16Type>();
837 let values = new_empty_array(&DataType::Int64);
838 let array = RunArray::try_new(runs, &values).unwrap();
839
840 fn assertions(array: &RunArray<Int16Type>) {
841 assert!(array.is_empty());
842 assert_eq!(array.get_start_physical_index(), 0);
843 assert_eq!(array.get_end_physical_index(), 0);
844 assert!(array.get_physical_indices::<i16>(&[]).unwrap().is_empty());
845 assert!(array.run_ends().is_empty());
846 assert_eq!(array.run_ends().sliced_values().count(), 0);
847 }
848
849 assertions(&array);
850 assertions(&array.slice(0, 0));
851 }
852
853 #[test]
854 fn test_run_array_fmt_debug() {
855 let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(3);
856 builder.append_value(12345678);
857 builder.append_null();
858 builder.append_value(22345678);
859 let array = builder.finish();
860 assert_eq!(
861 "RunArray {run_ends: [1, 2, 3], values: PrimitiveArray<UInt32>\n[\n 12345678,\n null,\n 22345678,\n]}\n",
862 format!("{array:?}")
863 );
864
865 let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(20);
866 for _ in 0..20 {
867 builder.append_value(1);
868 }
869 let array = builder.finish();
870
871 assert_eq!(array.len(), 20);
872 assert_eq!(array.null_count(), 0);
873 assert_eq!(array.logical_null_count(), 0);
874
875 assert_eq!(
876 "RunArray {run_ends: [20], values: PrimitiveArray<UInt32>\n[\n 1,\n]}\n",
877 format!("{array:?}")
878 );
879 }
880
881 #[test]
882 fn test_run_array_from_iter() {
883 let test = vec!["a", "a", "b", "c"];
884 let array: RunArray<Int16Type> = test
885 .iter()
886 .map(|&x| if x == "b" { None } else { Some(x) })
887 .collect();
888 assert_eq!(
889 "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n \"a\",\n null,\n \"c\",\n]}\n",
890 format!("{array:?}")
891 );
892
893 assert_eq!(array.len(), 4);
894 assert_eq!(array.null_count(), 0);
895 assert_eq!(array.logical_null_count(), 1);
896
897 let array: RunArray<Int16Type> = test.into_iter().collect();
898 assert_eq!(
899 "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n \"a\",\n \"b\",\n \"c\",\n]}\n",
900 format!("{array:?}")
901 );
902 }
903
904 #[test]
905 fn test_run_array_run_ends_as_primitive_array() {
906 let test = vec!["a", "b", "c", "a"];
907 let array: RunArray<Int16Type> = test.into_iter().collect();
908
909 assert_eq!(array.len(), 4);
910 assert_eq!(array.null_count(), 0);
911 assert_eq!(array.logical_null_count(), 0);
912
913 let run_ends = array.run_ends();
914 assert_eq!(&[1, 2, 3, 4], run_ends.values());
915 }
916
917 #[test]
918 fn test_run_array_as_primitive_array_with_null() {
919 let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
920 let array: RunArray<Int32Type> = test.into_iter().collect();
921
922 assert_eq!(array.len(), 6);
923 assert_eq!(array.null_count(), 0);
924 assert_eq!(array.logical_null_count(), 3);
925
926 let run_ends = array.run_ends();
927 assert_eq!(&[1, 2, 3, 5, 6], run_ends.values());
928
929 let values_data = array.values();
930 assert_eq!(2, values_data.null_count());
931 assert_eq!(5, values_data.len());
932 }
933
934 #[test]
935 fn test_run_array_all_nulls() {
936 let test = vec![None, None, None];
937 let array: RunArray<Int32Type> = test.into_iter().collect();
938
939 assert_eq!(array.len(), 3);
940 assert_eq!(array.null_count(), 0);
941 assert_eq!(array.logical_null_count(), 3);
942
943 let run_ends = array.run_ends();
944 assert_eq!(3, run_ends.len());
945 assert_eq!(&[3], run_ends.values());
946
947 let values_data = array.values();
948 assert_eq!(1, values_data.null_count());
949 }
950
951 #[test]
952 fn test_run_array_try_new() {
953 let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
954 .into_iter()
955 .collect();
956 let run_ends: Int32Array = [Some(1), Some(2), Some(3), Some(4)].into_iter().collect();
957
958 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
959 assert_eq!(array.values().data_type(), &DataType::Utf8);
960
961 assert_eq!(array.null_count(), 0);
962 assert_eq!(array.logical_null_count(), 1);
963 assert_eq!(array.len(), 4);
964 assert_eq!(array.values().null_count(), 1);
965
966 assert_eq!(
967 "RunArray {run_ends: [1, 2, 3, 4], values: StringArray\n[\n \"foo\",\n \"bar\",\n null,\n \"baz\",\n]}\n",
968 format!("{array:?}")
969 );
970 }
971
972 #[test]
973 fn test_run_array_int16_type_definition() {
974 let array: Int16RunArray = vec!["a", "a", "b", "c", "c"].into_iter().collect();
975 let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", "c"]));
976 assert_eq!(array.run_ends().values(), &[2, 3, 5]);
977 assert_eq!(array.values(), &values);
978 }
979
980 #[test]
981 fn test_run_array_empty_string() {
982 let array: Int16RunArray = vec!["a", "a", "", "", "c"].into_iter().collect();
983 let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "", "c"]));
984 assert_eq!(array.run_ends().values(), &[2, 4, 5]);
985 assert_eq!(array.values(), &values);
986 }
987
988 #[test]
989 fn test_run_array_length_mismatch() {
990 let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
991 .into_iter()
992 .collect();
993 let run_ends: Int32Array = [Some(1), Some(2), Some(3)].into_iter().collect();
994
995 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
996 let expected = ArrowError::InvalidArgumentError("The run_ends array length should be the same as values array length. Run_ends array length is 3, values array length is 4".to_string());
997 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
998 }
999
1000 #[test]
1001 fn test_run_array_run_ends_with_null() {
1002 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1003 .into_iter()
1004 .collect();
1005 let run_ends: Int32Array = [Some(1), None, Some(3)].into_iter().collect();
1006
1007 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
1008 let expected = ArrowError::InvalidArgumentError(
1009 "Found null values in run_ends array. The run_ends array should not have null values."
1010 .to_string(),
1011 );
1012 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
1013 }
1014
1015 #[test]
1016 fn test_run_array_run_ends_with_zeroes() {
1017 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1018 .into_iter()
1019 .collect();
1020 let run_ends: Int32Array = [Some(0), Some(1), Some(3)].into_iter().collect();
1021
1022 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
1023 let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly positive. Found value 0 at index 0 that does not match the criteria.".to_string());
1024 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
1025 }
1026
1027 #[test]
1028 fn test_run_array_run_ends_non_increasing() {
1029 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1030 .into_iter()
1031 .collect();
1032 let run_ends: Int32Array = [Some(1), Some(4), Some(4)].into_iter().collect();
1033
1034 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
1035 let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly increasing. Found value 4 at index 2 with previous value 4 that does not match the criteria.".to_string());
1036 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
1037 }
1038
1039 #[test]
1040 #[should_panic(expected = "Incorrect run ends type")]
1041 fn test_run_array_run_ends_data_type_mismatch() {
1042 let a = RunArray::<Int32Type>::from_iter(["32"]);
1043 let _ = RunArray::<Int64Type>::from(a.into_data());
1044 }
1045
1046 #[test]
1047 fn test_ree_array_accessor() {
1048 let input_array = build_input_array(256);
1049
1050 let mut builder =
1052 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
1053 builder.extend(input_array.iter().copied());
1054 let run_array = builder.finish();
1055 let typed = run_array.downcast::<PrimitiveArray<Int32Type>>().unwrap();
1056
1057 for (i, inp_val) in input_array.iter().enumerate() {
1059 if let Some(val) = inp_val {
1060 let actual = typed.value(i);
1061 assert_eq!(*val, actual)
1062 } else {
1063 let physical_ix = run_array.get_physical_index(i);
1064 assert!(typed.values().is_null(physical_ix));
1065 };
1066 }
1067 }
1068
1069 #[test]
1070 #[cfg_attr(miri, ignore)] fn test_get_physical_indices() {
1072 for logical_len in (0..250).step_by(10) {
1074 let input_array = build_input_array(logical_len);
1075
1076 let mut builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
1078 builder.extend(input_array.clone().into_iter());
1079
1080 let run_array = builder.finish();
1081 let physical_values_array = run_array.values().as_primitive::<Int32Type>();
1082
1083 let mut logical_indices: Vec<u32> = (0_u32..(logical_len as u32)).collect();
1085 logical_indices.append(&mut logical_indices.clone());
1087 let mut rng = rng();
1088 logical_indices.shuffle(&mut rng);
1089
1090 let physical_indices = run_array.get_physical_indices(&logical_indices).unwrap();
1091
1092 assert_eq!(logical_indices.len(), physical_indices.len());
1093
1094 compare_logical_and_physical_indices(
1096 &logical_indices,
1097 &input_array,
1098 &physical_indices,
1099 physical_values_array,
1100 );
1101 }
1102 }
1103
1104 #[test]
1105 #[cfg_attr(miri, ignore)] fn test_get_physical_indices_sliced() {
1107 let total_len = 80;
1108 let input_array = build_input_array(total_len);
1109
1110 let mut builder =
1112 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
1113 builder.extend(input_array.iter().copied());
1114 let run_array = builder.finish();
1115 let physical_values_array = run_array.values().as_primitive::<Int32Type>();
1116
1117 for slice_len in 1..=total_len {
1119 let mut logical_indices: Vec<u32> = (0_u32..(slice_len as u32)).collect();
1121 logical_indices.append(&mut logical_indices.clone());
1123 let mut rng = rng();
1124 logical_indices.shuffle(&mut rng);
1125
1126 let sliced_input_array = &input_array[0..slice_len];
1129
1130 let sliced_run_array: RunArray<Int16Type> =
1132 run_array.slice(0, slice_len).into_data().into();
1133
1134 let physical_indices = sliced_run_array
1136 .get_physical_indices(&logical_indices)
1137 .unwrap();
1138
1139 compare_logical_and_physical_indices(
1140 &logical_indices,
1141 sliced_input_array,
1142 &physical_indices,
1143 physical_values_array,
1144 );
1145
1146 let sliced_input_array = &input_array[total_len - slice_len..total_len];
1149
1150 let sliced_run_array: RunArray<Int16Type> = run_array
1152 .slice(total_len - slice_len, slice_len)
1153 .into_data()
1154 .into();
1155
1156 let physical_indices = sliced_run_array
1158 .get_physical_indices(&logical_indices)
1159 .unwrap();
1160
1161 compare_logical_and_physical_indices(
1162 &logical_indices,
1163 sliced_input_array,
1164 &physical_indices,
1165 physical_values_array,
1166 );
1167 }
1168 }
1169
1170 #[test]
1171 fn test_logical_nulls() {
1172 let run = Int32Array::from(vec![3, 6, 9, 12]);
1173 let values = Int32Array::from(vec![Some(0), None, Some(1), None]);
1174 let array = RunArray::try_new(&run, &values).unwrap();
1175
1176 let expected = [
1177 true, true, true, false, false, false, true, true, true, false, false, false,
1178 ];
1179
1180 let n = array.logical_nulls().unwrap();
1181 assert_eq!(n.null_count(), 6);
1182
1183 let slices = [(0, 12), (0, 2), (2, 5), (3, 0), (3, 3), (3, 4), (4, 8)];
1184 for (offset, length) in slices {
1185 let a = array.slice(offset, length);
1186 let n = a.logical_nulls().unwrap();
1187 let n = n.into_iter().collect::<Vec<_>>();
1188 assert_eq!(&n, &expected[offset..offset + length], "{offset} {length}");
1189 }
1190 }
1191
1192 #[test]
1193 fn test_run_array_eq_identical() {
1194 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1195 let values1 = StringArray::from(vec!["a", "b", "c"]);
1196 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1197
1198 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1199 let values2 = StringArray::from(vec!["a", "b", "c"]);
1200 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1201
1202 assert_eq!(array1, array2);
1203 }
1204
1205 #[test]
1206 fn test_run_array_ne_different_run_ends() {
1207 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1208 let values1 = StringArray::from(vec!["a", "b", "c"]);
1209 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1210
1211 let run_ends2 = Int32Array::from(vec![1, 4, 6]);
1212 let values2 = StringArray::from(vec!["a", "b", "c"]);
1213 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1214
1215 assert_ne!(array1, array2);
1216 }
1217
1218 #[test]
1219 fn test_run_array_ne_different_values() {
1220 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1221 let values1 = StringArray::from(vec!["a", "b", "c"]);
1222 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1223
1224 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1225 let values2 = StringArray::from(vec!["a", "b", "d"]);
1226 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1227
1228 assert_ne!(array1, array2);
1229 }
1230
1231 #[test]
1232 fn test_run_array_eq_with_nulls() {
1233 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1234 let values1 = StringArray::from(vec![Some("a"), None, Some("c")]);
1235 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1236
1237 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1238 let values2 = StringArray::from(vec![Some("a"), None, Some("c")]);
1239 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1240
1241 assert_eq!(array1, array2);
1242 }
1243
1244 #[test]
1245 fn test_run_array_eq_different_run_end_types() {
1246 let run_ends_i16_1 = Int16Array::from(vec![2_i16, 4, 6]);
1247 let values_i16_1 = StringArray::from(vec!["a", "b", "c"]);
1248 let array_i16_1 = RunArray::<Int16Type>::try_new(&run_ends_i16_1, &values_i16_1).unwrap();
1249
1250 let run_ends_i16_2 = Int16Array::from(vec![2_i16, 4, 6]);
1251 let values_i16_2 = StringArray::from(vec!["a", "b", "c"]);
1252 let array_i16_2 = RunArray::<Int16Type>::try_new(&run_ends_i16_2, &values_i16_2).unwrap();
1253
1254 assert_eq!(array_i16_1, array_i16_2);
1255 }
1256
1257 #[test]
1258 fn test_run_array_values_slice() {
1259 let run_ends: PrimitiveArray<Int32Type> = vec![2, 5, 20].into();
1261 let values: PrimitiveArray<Int32Type> = vec![0, 1, 2].into();
1262 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1263
1264 let slice = array.slice(1, 4); assert_eq!(slice.get_start_physical_index(), 0);
1271 assert_eq!(slice.get_end_physical_index(), 1);
1272
1273 let values_slice = slice.values_slice();
1274 let values_slice = values_slice.as_primitive::<Int32Type>();
1275 assert_eq!(values_slice.values(), &[0, 1]);
1276
1277 let slice2 = array.slice(2, 3); assert_eq!(slice2.get_start_physical_index(), 1);
1281 assert_eq!(slice2.get_end_physical_index(), 1);
1282
1283 let values_slice2 = slice2.values_slice();
1284 let values_slice2 = values_slice2.as_primitive::<Int32Type>();
1285 assert_eq!(values_slice2.values(), &[1]);
1286 }
1287
1288 #[test]
1289 fn test_run_array_values_slice_empty() {
1290 let run_ends = Int32Array::from(vec![2, 5, 10]);
1291 let values = StringArray::from(vec!["a", "b", "c"]);
1292 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1293
1294 let slice = array.slice(0, 0);
1295 assert_eq!(slice.len(), 0);
1296
1297 let values_slice = slice.values_slice();
1298 assert_eq!(values_slice.len(), 0);
1299 assert_eq!(values_slice.data_type(), &DataType::Utf8);
1300 }
1301
1302 #[test]
1303 fn test_run_array_eq_empty() {
1304 let run_ends = Int32Array::from(vec![2, 5, 10]);
1305 let values = StringArray::from(vec!["a", "b", "c"]);
1306 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1307
1308 let slice1 = array.slice(0, 0);
1309 let slice2 = array.slice(1, 0);
1310 let slice3 = array.slice(10, 0);
1311
1312 assert_eq!(slice1, slice2);
1313 assert_eq!(slice2, slice3);
1314
1315 let empty_array = new_empty_array(array.data_type());
1316 let empty_array = crate::cast::as_run_array::<Int32Type>(empty_array.as_ref());
1317
1318 assert_eq!(&slice1, empty_array);
1319 }
1320
1321 #[test]
1322 fn test_run_array_eq_diff_physical_same_logical() {
1323 let run_ends1 = Int32Array::from(vec![1, 3, 6]);
1324 let values1 = StringArray::from(vec!["a", "b", "c"]);
1325 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1326
1327 let run_ends2 = Int32Array::from(vec![1, 2, 3, 4, 5, 6]);
1328 let values2 = StringArray::from(vec!["a", "b", "b", "c", "c", "c"]);
1329 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1330
1331 assert_eq!(array1, array2);
1332 }
1333
1334 #[test]
1335 fn test_run_array_eq_sliced() {
1336 let run_ends1 = Int32Array::from(vec![2, 5, 10]);
1337 let values1 = StringArray::from(vec!["a", "b", "c"]);
1338 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1339 let slice1 = array1.slice(1, 6);
1342 let run_ends2 = Int32Array::from(vec![1, 4, 6]);
1345 let values2 = StringArray::from(vec!["a", "b", "c"]);
1346 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1347 assert_eq!(slice1, array2);
1350
1351 let slice2 = array1.slice(2, 3);
1352 let run_ends3 = Int32Array::from(vec![3]);
1354 let values3 = StringArray::from(vec!["b"]);
1355 let array3 = RunArray::<Int32Type>::try_new(&run_ends3, &values3).unwrap();
1356 assert_eq!(slice2, array3);
1357 }
1358
1359 #[test]
1360 fn test_run_array_eq_sliced_different_offsets() {
1361 let run_ends1 = Int32Array::from(vec![2, 5, 10]);
1362 let values1 = StringArray::from(vec!["a", "b", "c"]);
1363 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1364 let array2 = array1.clone();
1365 assert_eq!(array1, array2);
1366
1367 let slice1 = array1.slice(1, 4); let slice2 = array1.slice(1, 4);
1369 assert_eq!(slice1, slice2);
1370
1371 let slice3 = array1.slice(0, 4); assert_ne!(slice1, slice3);
1373 }
1374
1375 #[test]
1376 #[cfg(not(feature = "force_validate"))]
1377 fn allow_to_create_invalid_array_using_new_unchecked() {
1378 let valid = RunArray::<Int32Type>::from_iter(["32"]);
1379 let (_, buffer, values) = valid.into_parts();
1380
1381 let _ = unsafe {
1382 RunArray::<Int32Type>::new_unchecked(DataType::Int64, buffer, values)
1384 };
1385 }
1386
1387 #[test]
1388 #[should_panic(
1389 expected = "Invalid data type Int64 for RunArray. Should be DataType::RunEndEncoded"
1390 )]
1391 #[cfg(feature = "force_validate")]
1392 fn should_not_be_able_to_create_invalid_array_using_new_unchecked_when_force_validate_is_enabled()
1393 {
1394 let valid = RunArray::<Int32Type>::from_iter(["32"]);
1395 let (_, buffer, values) = valid.into_parts();
1396
1397 let _ = unsafe {
1398 RunArray::<Int32Type>::new_unchecked(DataType::Int64, buffer, values)
1400 };
1401 }
1402
1403 #[test]
1404 fn test_run_array_roundtrip() {
1405 let run = Int32Array::from(vec![3, 6, 9, 12]);
1406 let values = Int32Array::from(vec![Some(0), None, Some(1), None]);
1407 let array = RunArray::try_new(&run, &values).unwrap();
1408
1409 let (dt, buffer, values) = array.clone().into_parts();
1410 let created_from_parts =
1411 unsafe { RunArray::<Int32Type>::new_unchecked(dt, buffer, values) };
1412 assert_eq!(array, created_from_parts);
1413 }
1414}