1use std::any::Any;
19use std::sync::Arc;
20
21use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, RunEndBuffer, ScalarBuffer};
22use arrow_data::{ArrayData, ArrayDataBuilder};
23use arrow_schema::{ArrowError, DataType, Field};
24
25use crate::{
26 Array, ArrayAccessor, ArrayRef, PrimitiveArray,
27 builder::StringRunBuilder,
28 make_array,
29 run_iterator::RunArrayIter,
30 types::{Int16Type, Int32Type, Int64Type, RunEndIndexType},
31};
32
33#[macro_export]
44macro_rules! ree_map {
45 ($array:expr, $run_type:ty, $func:expr) => {{
46 let ree = $array.as_run_opt::<$run_type>().unwrap();
47 let inner_values = $func(ree.values().as_ref())?;
48 Ok(std::sync::Arc::new(ree.with_values(inner_values)))
49 }};
50}
51
52pub struct RunArray<R: RunEndIndexType> {
85 data_type: DataType,
86 run_ends: RunEndBuffer<R::Native>,
87 values: ArrayRef,
88}
89
90impl<R: RunEndIndexType> Clone for RunArray<R> {
91 fn clone(&self) -> Self {
92 Self {
93 data_type: self.data_type.clone(),
94 run_ends: self.run_ends.clone(),
95 values: self.values.clone(),
96 }
97 }
98}
99
100impl<R: RunEndIndexType> RunArray<R> {
101 pub fn logical_len(run_ends: &PrimitiveArray<R>) -> usize {
104 let len = run_ends.len();
105 if len == 0 {
106 return 0;
107 }
108 run_ends.value(len - 1).as_usize()
109 }
110
111 pub fn try_new(run_ends: &PrimitiveArray<R>, values: &dyn Array) -> Result<Self, ArrowError> {
119 let run_ends_type = run_ends.data_type().clone();
120 let values_type = values.data_type().clone();
121 let ree_array_type = DataType::RunEndEncoded(
122 Arc::new(Field::new("run_ends", run_ends_type, false)),
123 Arc::new(Field::new("values", values_type, true)),
124 );
125 let len = RunArray::logical_len(run_ends);
126 let builder = ArrayDataBuilder::new(ree_array_type)
127 .len(len)
128 .add_child_data(run_ends.to_data())
129 .add_child_data(values.to_data());
130
131 let array_data = unsafe { builder.build_unchecked() };
133
134 array_data.validate_data()?;
141
142 Ok(array_data.into())
143 }
144
145 pub unsafe fn new_unchecked(
151 data_type: DataType,
152 run_ends: RunEndBuffer<R::Native>,
153 values: ArrayRef,
154 ) -> Self {
155 if cfg!(feature = "force_validate") {
156 match &data_type {
157 DataType::RunEndEncoded(run_ends, values_field) => {
158 assert!(!run_ends.is_nullable(), "run_ends should not be nullable");
159 assert_eq!(
160 run_ends.data_type(),
161 &R::DATA_TYPE,
162 "Incorrect run ends type"
163 );
164 assert_eq!(
165 values_field.data_type(),
166 values.data_type(),
167 "Incorrect values type"
168 );
169 }
170 _ => {
171 panic!(
172 "Invalid data type {data_type:?} for RunArray. Should be DataType::RunEndEncoded"
173 );
174 }
175 }
176
177 let run_array = Self {
178 data_type,
179 run_ends,
180 values,
181 };
182
183 run_array
190 .to_data()
191 .validate_data()
192 .expect("RunArray data should be valid");
193
194 return run_array;
195 }
196
197 Self {
198 data_type,
199 run_ends,
200 values,
201 }
202 }
203
204 pub fn into_parts(self) -> (DataType, RunEndBuffer<R::Native>, ArrayRef) {
206 (self.data_type, self.run_ends, self.values)
207 }
208
209 pub fn run_ends(&self) -> &RunEndBuffer<R::Native> {
211 &self.run_ends
212 }
213
214 pub fn values(&self) -> &ArrayRef {
219 &self.values
220 }
221
222 pub fn with_values(&self, values: ArrayRef) -> Self {
248 assert_eq!(values.len(), self.values().len());
249 let (run_ends_field, values_field) = match &self.data_type {
250 DataType::RunEndEncoded(r, v) => (r, v),
251 _ => unreachable!("RunArray should have type RunEndEncoded"),
252 };
253 let data_type =
254 DataType::RunEndEncoded(Arc::clone(run_ends_field), Arc::clone(values_field));
255 Self {
256 data_type,
257 run_ends: self.run_ends.clone(),
258 values,
259 }
260 }
261
262 pub fn values_slice(&self) -> ArrayRef {
267 if self.is_empty() {
268 return self.values.slice(0, 0);
269 }
270 let start = self.get_start_physical_index();
271 let end = self.get_end_physical_index();
272 self.values.slice(start, end - start + 1)
273 }
274
275 pub fn get_start_physical_index(&self) -> usize {
279 self.run_ends.get_start_physical_index()
280 }
281
282 pub fn get_end_physical_index(&self) -> usize {
286 self.run_ends.get_end_physical_index()
287 }
288
289 pub fn downcast<V: 'static>(&self) -> Option<TypedRunArray<'_, R, V>> {
302 let values = self.values.as_any().downcast_ref()?;
303 Some(TypedRunArray {
304 run_array: self,
305 values,
306 })
307 }
308
309 pub fn get_physical_index(&self, logical_index: usize) -> usize {
313 self.run_ends.get_physical_index(logical_index)
314 }
315
316 #[inline]
320 pub fn get_physical_indices<I>(&self, logical_indices: &[I]) -> Result<Vec<usize>, ArrowError>
321 where
322 I: ArrowNativeType,
323 {
324 self.run_ends()
325 .get_physical_indices(logical_indices)
326 .map_err(|index| {
327 ArrowError::InvalidArgumentError(format!(
328 "Logical index {} is out of bounds for RunArray of length {}",
329 index.as_usize(),
330 self.len()
331 ))
332 })
333 }
334
335 pub fn slice(&self, offset: usize, length: usize) -> Self {
341 Self {
342 data_type: self.data_type.clone(),
343 run_ends: self.run_ends.slice(offset, length),
344 values: self.values.clone(),
345 }
346 }
347}
348
349impl<R: RunEndIndexType> From<ArrayData> for RunArray<R> {
350 fn from(data: ArrayData) -> Self {
352 let (data_type, len, _nulls, offset, _buffers, child_data) = data.into_parts();
353
354 match &data_type {
355 DataType::RunEndEncoded(_, _) => {}
356 _ => {
357 panic!(
358 "Invalid data type {data_type:?} for RunArray. Should be DataType::RunEndEncoded"
359 );
360 }
361 }
362
363 let [run_end_child, values_child]: [ArrayData; 2] = child_data
364 .try_into()
365 .expect("RunArray data should have exactly two child arrays");
366
367 let (
369 run_end_data_type,
370 _run_end_len,
371 _run_end_nulls,
372 _run_end_offset,
373 run_end_buffers,
374 _run_end_child_data,
375 ) = run_end_child.into_parts();
376 assert_eq!(run_end_data_type, R::DATA_TYPE, "Incorrect run ends type");
377 let [run_end_buffer]: [arrow_buffer::Buffer; 1] = run_end_buffers
378 .try_into()
379 .expect("Run ends should have exactly one buffer");
380 let scalar = ScalarBuffer::from(run_end_buffer);
381 let run_ends = unsafe { RunEndBuffer::new_unchecked(scalar, offset, len) };
382
383 let values = make_array(values_child);
384
385 Self {
386 data_type,
387 run_ends,
388 values,
389 }
390 }
391}
392
393impl<R: RunEndIndexType> From<RunArray<R>> for ArrayData {
394 fn from(array: RunArray<R>) -> Self {
395 let len = array.run_ends.len();
396 let offset = array.run_ends.offset();
397
398 let run_ends = ArrayDataBuilder::new(R::DATA_TYPE)
399 .len(array.run_ends.values().len())
400 .buffers(vec![array.run_ends.into_inner().into_inner()]);
401
402 let run_ends = unsafe { run_ends.build_unchecked() };
403
404 let builder = ArrayDataBuilder::new(array.data_type)
405 .len(len)
406 .offset(offset)
407 .child_data(vec![run_ends, array.values.to_data()]);
408
409 unsafe { builder.build_unchecked() }
410 }
411}
412
413unsafe impl<T: RunEndIndexType> Array for RunArray<T> {
415 fn as_any(&self) -> &dyn Any {
416 self
417 }
418
419 fn to_data(&self) -> ArrayData {
420 self.clone().into()
421 }
422
423 fn into_data(self) -> ArrayData {
424 self.into()
425 }
426
427 fn data_type(&self) -> &DataType {
428 &self.data_type
429 }
430
431 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
432 Arc::new(self.slice(offset, length))
433 }
434
435 fn len(&self) -> usize {
436 self.run_ends.len()
437 }
438
439 fn is_empty(&self) -> bool {
440 self.run_ends.is_empty()
441 }
442
443 fn shrink_to_fit(&mut self) {
444 self.run_ends.shrink_to_fit();
445 self.values.shrink_to_fit();
446 }
447
448 fn offset(&self) -> usize {
449 self.run_ends.offset()
450 }
451
452 fn nulls(&self) -> Option<&NullBuffer> {
453 None
454 }
455
456 fn logical_nulls(&self) -> Option<NullBuffer> {
457 let len = self.len();
458 let nulls = self.values.logical_nulls()?;
459 let mut out = BooleanBufferBuilder::new(len);
460 let offset = self.run_ends.offset();
461 let mut valid_start = 0;
462 let mut last_end = 0;
463 for (idx, end) in self.run_ends.values().iter().enumerate() {
464 let end = end.as_usize();
465 if end < offset {
466 continue;
467 }
468 let end = (end - offset).min(len);
469 if nulls.is_null(idx) {
470 if valid_start < last_end {
471 out.append_n(last_end - valid_start, true);
472 }
473 out.append_n(end - last_end, false);
474 valid_start = end;
475 }
476 last_end = end;
477 if end == len {
478 break;
479 }
480 }
481 if valid_start < len {
482 out.append_n(len - valid_start, true)
483 }
484 assert_eq!(out.len(), len);
486 Some(out.finish().into())
487 }
488
489 fn is_nullable(&self) -> bool {
490 !self.is_empty() && self.values.is_nullable()
491 }
492
493 fn get_buffer_memory_size(&self) -> usize {
494 self.run_ends.inner().inner().capacity() + self.values.get_buffer_memory_size()
495 }
496
497 fn get_array_memory_size(&self) -> usize {
498 std::mem::size_of::<Self>()
499 + self.run_ends.inner().inner().capacity()
500 + self.values.get_array_memory_size()
501 }
502
503 #[cfg(feature = "pool")]
504 fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
505 self.run_ends.claim(pool);
506 self.values.claim(pool);
507 }
508}
509
510impl<R: RunEndIndexType> std::fmt::Debug for RunArray<R> {
511 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
512 writeln!(
513 f,
514 "RunArray {{run_ends: {:?}, values: {:?}}}",
515 self.run_ends.values(),
516 self.values
517 )
518 }
519}
520
521impl<'a, T: RunEndIndexType> FromIterator<Option<&'a str>> for RunArray<T> {
538 fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
539 let it = iter.into_iter();
540 let (lower, _) = it.size_hint();
541 let mut builder = StringRunBuilder::with_capacity(lower, 256);
542 it.for_each(|i| {
543 builder.append_option(i);
544 });
545
546 builder.finish()
547 }
548}
549
550impl<'a, T: RunEndIndexType> FromIterator<&'a str> for RunArray<T> {
565 fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
566 let it = iter.into_iter();
567 let (lower, _) = it.size_hint();
568 let mut builder = StringRunBuilder::with_capacity(lower, 256);
569 it.for_each(|i| {
570 builder.append_value(i);
571 });
572
573 builder.finish()
574 }
575}
576
577pub type Int16RunArray = RunArray<Int16Type>;
591
592pub type Int32RunArray = RunArray<Int32Type>;
606
607pub type Int64RunArray = RunArray<Int64Type>;
621
622pub struct TypedRunArray<'a, R: RunEndIndexType, V> {
640 run_array: &'a RunArray<R>,
642
643 values: &'a V,
645}
646
647impl<R: RunEndIndexType, V> Clone for TypedRunArray<'_, R, V> {
649 fn clone(&self) -> Self {
650 *self
651 }
652}
653
654impl<R: RunEndIndexType, V> Copy for TypedRunArray<'_, R, V> {}
655
656impl<R: RunEndIndexType, V> std::fmt::Debug for TypedRunArray<'_, R, V> {
657 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
658 writeln!(f, "TypedRunArray({:?})", self.run_array)
659 }
660}
661
662impl<'a, R: RunEndIndexType, V> TypedRunArray<'a, R, V> {
663 pub fn run_ends(&self) -> &'a RunEndBuffer<R::Native> {
665 self.run_array.run_ends()
666 }
667
668 pub fn values(&self) -> &'a V {
670 self.values
671 }
672
673 pub fn run_array(&self) -> &'a RunArray<R> {
675 self.run_array
676 }
677}
678
679unsafe impl<R: RunEndIndexType, V: Sync> Array for TypedRunArray<'_, R, V> {
681 fn as_any(&self) -> &dyn Any {
682 self.run_array
683 }
684
685 fn to_data(&self) -> ArrayData {
686 self.run_array.to_data()
687 }
688
689 fn into_data(self) -> ArrayData {
690 self.run_array.into_data()
691 }
692
693 fn data_type(&self) -> &DataType {
694 self.run_array.data_type()
695 }
696
697 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
698 Arc::new(self.run_array.slice(offset, length))
699 }
700
701 fn len(&self) -> usize {
702 self.run_array.len()
703 }
704
705 fn is_empty(&self) -> bool {
706 self.run_array.is_empty()
707 }
708
709 fn offset(&self) -> usize {
710 self.run_array.offset()
711 }
712
713 fn nulls(&self) -> Option<&NullBuffer> {
714 self.run_array.nulls()
715 }
716
717 fn logical_nulls(&self) -> Option<NullBuffer> {
718 self.run_array.logical_nulls()
719 }
720
721 fn logical_null_count(&self) -> usize {
722 self.run_array.logical_null_count()
723 }
724
725 fn is_nullable(&self) -> bool {
726 self.run_array.is_nullable()
727 }
728
729 fn get_buffer_memory_size(&self) -> usize {
730 self.run_array.get_buffer_memory_size()
731 }
732
733 fn get_array_memory_size(&self) -> usize {
734 self.run_array.get_array_memory_size()
735 }
736
737 #[cfg(feature = "pool")]
738 fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
739 self.run_array.claim(pool);
740 }
741}
742
743impl<'a, R, V> ArrayAccessor for TypedRunArray<'a, R, V>
746where
747 R: RunEndIndexType,
748 V: Sync + Send,
749 &'a V: ArrayAccessor,
750 <&'a V as ArrayAccessor>::Item: Default,
751{
752 type Item = <&'a V as ArrayAccessor>::Item;
753
754 fn value(&self, logical_index: usize) -> Self::Item {
755 assert!(
756 logical_index < self.len(),
757 "Trying to access an element at index {} from a TypedRunArray of length {}",
758 logical_index,
759 self.len()
760 );
761 unsafe { self.value_unchecked(logical_index) }
762 }
763
764 unsafe fn value_unchecked(&self, logical_index: usize) -> Self::Item {
765 let physical_index = self.run_array.get_physical_index(logical_index);
766 unsafe { self.values().value_unchecked(physical_index) }
767 }
768}
769
770impl<'a, R, V> IntoIterator for TypedRunArray<'a, R, V>
771where
772 R: RunEndIndexType,
773 V: Sync + Send,
774 &'a V: ArrayAccessor,
775 <&'a V as ArrayAccessor>::Item: Default,
776{
777 type Item = Option<<&'a V as ArrayAccessor>::Item>;
778 type IntoIter = RunArrayIter<'a, R, V>;
779
780 fn into_iter(self) -> Self::IntoIter {
781 RunArrayIter::new(self)
782 }
783}
784
785#[cfg(test)]
786mod tests {
787 use rand::Rng;
788 use rand::rng;
789 use rand::seq::SliceRandom;
790
791 use super::*;
792 use crate::builder::PrimitiveRunBuilder;
793 use crate::cast::AsArray;
794 use crate::new_empty_array;
795 use crate::types::{Int8Type, UInt32Type};
796 use crate::{Int16Array, Int32Array, StringArray};
797
798 fn build_input_array(size: usize) -> Vec<Option<i32>> {
799 let mut seed: Vec<Option<i32>> = vec![
802 None,
803 None,
804 None,
805 Some(1),
806 Some(2),
807 Some(3),
808 Some(4),
809 Some(5),
810 Some(6),
811 Some(7),
812 Some(8),
813 Some(9),
814 ];
815 let mut result: Vec<Option<i32>> = Vec::with_capacity(size);
816 let mut ix = 0;
817 let mut rng = rng();
818 let max_run_length = 8_usize.min(1_usize.max(size / 2));
820 while result.len() < size {
821 if ix == 0 {
823 seed.shuffle(&mut rng);
824 }
825 let num = max_run_length.min(rng.random_range(1..=max_run_length));
827 for _ in 0..num {
828 result.push(seed[ix]);
829 }
830 ix += 1;
831 if ix == seed.len() {
832 ix = 0
833 }
834 }
835 result.resize(size, None);
836 result
837 }
838
839 fn compare_logical_and_physical_indices(
841 logical_indices: &[u32],
842 logical_array: &[Option<i32>],
843 physical_indices: &[usize],
844 physical_array: &PrimitiveArray<Int32Type>,
845 ) {
846 assert_eq!(logical_indices.len(), physical_indices.len());
847
848 logical_indices
850 .iter()
851 .map(|f| f.as_usize())
852 .zip(physical_indices.iter())
853 .for_each(|(logical_ix, physical_ix)| {
854 let expected = logical_array[logical_ix];
855 match expected {
856 Some(val) => {
857 assert!(physical_array.is_valid(*physical_ix));
858 let actual = physical_array.value(*physical_ix);
859 assert_eq!(val, actual);
860 }
861 None => {
862 assert!(physical_array.is_null(*physical_ix))
863 }
864 };
865 });
866 }
867 #[test]
868 fn test_run_array() {
869 let value_data =
871 PrimitiveArray::<Int8Type>::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
872
873 let run_ends_values = [4_i16, 6, 7, 9, 13, 18, 20, 22];
875 let run_ends_data =
876 PrimitiveArray::<Int16Type>::from_iter_values(run_ends_values.iter().copied());
877
878 let ree_array = RunArray::<Int16Type>::try_new(&run_ends_data, &value_data).unwrap();
880
881 assert_eq!(ree_array.len(), 22);
882 assert_eq!(ree_array.null_count(), 0);
883
884 let values = ree_array.values();
885 assert_eq!(value_data.into_data(), values.to_data());
886 assert_eq!(&DataType::Int8, values.data_type());
887
888 let run_ends = ree_array.run_ends();
889 assert_eq!(run_ends.values(), &run_ends_values);
890 }
891
892 #[test]
893 fn test_run_array_empty() {
894 let runs = new_empty_array(&DataType::Int16);
895 let runs = runs.as_primitive::<Int16Type>();
896 let values = new_empty_array(&DataType::Int64);
897 let array = RunArray::try_new(runs, &values).unwrap();
898
899 fn assertions(array: &RunArray<Int16Type>) {
900 assert!(array.is_empty());
901 assert_eq!(array.get_start_physical_index(), 0);
902 assert_eq!(array.get_end_physical_index(), 0);
903 assert!(array.get_physical_indices::<i16>(&[]).unwrap().is_empty());
904 assert!(array.run_ends().is_empty());
905 assert_eq!(array.run_ends().sliced_values().count(), 0);
906 }
907
908 assertions(&array);
909 assertions(&array.slice(0, 0));
910 }
911
912 #[test]
913 fn test_run_array_fmt_debug() {
914 let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(3);
915 builder.append_value(12345678);
916 builder.append_null();
917 builder.append_value(22345678);
918 let array = builder.finish();
919 assert_eq!(
920 "RunArray {run_ends: [1, 2, 3], values: PrimitiveArray<UInt32>\n[\n 12345678,\n null,\n 22345678,\n]}\n",
921 format!("{array:?}")
922 );
923
924 let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(20);
925 for _ in 0..20 {
926 builder.append_value(1);
927 }
928 let array = builder.finish();
929
930 assert_eq!(array.len(), 20);
931 assert_eq!(array.null_count(), 0);
932 assert_eq!(array.logical_null_count(), 0);
933
934 assert_eq!(
935 "RunArray {run_ends: [20], values: PrimitiveArray<UInt32>\n[\n 1,\n]}\n",
936 format!("{array:?}")
937 );
938 }
939
940 #[test]
941 fn test_run_array_from_iter() {
942 let test = vec!["a", "a", "b", "c"];
943 let array: RunArray<Int16Type> = test
944 .iter()
945 .map(|&x| if x == "b" { None } else { Some(x) })
946 .collect();
947 assert_eq!(
948 "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n \"a\",\n null,\n \"c\",\n]}\n",
949 format!("{array:?}")
950 );
951
952 assert_eq!(array.len(), 4);
953 assert_eq!(array.null_count(), 0);
954 assert_eq!(array.logical_null_count(), 1);
955
956 let array: RunArray<Int16Type> = test.into_iter().collect();
957 assert_eq!(
958 "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n \"a\",\n \"b\",\n \"c\",\n]}\n",
959 format!("{array:?}")
960 );
961 }
962
963 #[test]
964 fn test_run_array_run_ends_as_primitive_array() {
965 let test = vec!["a", "b", "c", "a"];
966 let array: RunArray<Int16Type> = test.into_iter().collect();
967
968 assert_eq!(array.len(), 4);
969 assert_eq!(array.null_count(), 0);
970 assert_eq!(array.logical_null_count(), 0);
971
972 let run_ends = array.run_ends();
973 assert_eq!(&[1, 2, 3, 4], run_ends.values());
974 }
975
976 #[test]
977 fn test_run_array_as_primitive_array_with_null() {
978 let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
979 let array: RunArray<Int32Type> = test.into_iter().collect();
980
981 assert_eq!(array.len(), 6);
982 assert_eq!(array.null_count(), 0);
983 assert_eq!(array.logical_null_count(), 3);
984
985 let run_ends = array.run_ends();
986 assert_eq!(&[1, 2, 3, 5, 6], run_ends.values());
987
988 let values_data = array.values();
989 assert_eq!(2, values_data.null_count());
990 assert_eq!(5, values_data.len());
991 }
992
993 #[test]
994 fn test_run_array_all_nulls() {
995 let test = vec![None, None, None];
996 let array: RunArray<Int32Type> = test.into_iter().collect();
997
998 assert_eq!(array.len(), 3);
999 assert_eq!(array.null_count(), 0);
1000 assert_eq!(array.logical_null_count(), 3);
1001
1002 let run_ends = array.run_ends();
1003 assert_eq!(3, run_ends.len());
1004 assert_eq!(&[3], run_ends.values());
1005
1006 let values_data = array.values();
1007 assert_eq!(1, values_data.null_count());
1008 }
1009
1010 #[test]
1011 fn test_run_array_try_new() {
1012 let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
1013 .into_iter()
1014 .collect();
1015 let run_ends: Int32Array = [Some(1), Some(2), Some(3), Some(4)].into_iter().collect();
1016
1017 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1018 assert_eq!(array.values().data_type(), &DataType::Utf8);
1019
1020 assert_eq!(array.null_count(), 0);
1021 assert_eq!(array.logical_null_count(), 1);
1022 assert_eq!(array.len(), 4);
1023 assert_eq!(array.values().null_count(), 1);
1024
1025 assert_eq!(
1026 "RunArray {run_ends: [1, 2, 3, 4], values: StringArray\n[\n \"foo\",\n \"bar\",\n null,\n \"baz\",\n]}\n",
1027 format!("{array:?}")
1028 );
1029 }
1030
1031 #[test]
1032 fn test_run_array_int16_type_definition() {
1033 let array: Int16RunArray = vec!["a", "a", "b", "c", "c"].into_iter().collect();
1034 let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", "c"]));
1035 assert_eq!(array.run_ends().values(), &[2, 3, 5]);
1036 assert_eq!(array.values(), &values);
1037 }
1038
1039 #[test]
1040 fn test_run_array_empty_string() {
1041 let array: Int16RunArray = vec!["a", "a", "", "", "c"].into_iter().collect();
1042 let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "", "c"]));
1043 assert_eq!(array.run_ends().values(), &[2, 4, 5]);
1044 assert_eq!(array.values(), &values);
1045 }
1046
1047 #[test]
1048 fn test_run_array_length_mismatch() {
1049 let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
1050 .into_iter()
1051 .collect();
1052 let run_ends: Int32Array = [Some(1), Some(2), Some(3)].into_iter().collect();
1053
1054 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
1055 let expected = ArrowError::InvalidArgumentError("The run_ends array length should be the same as values array length. Run_ends array length is 3, values array length is 4".to_string());
1056 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
1057 }
1058
1059 #[test]
1060 fn test_run_array_run_ends_with_null() {
1061 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1062 .into_iter()
1063 .collect();
1064 let run_ends: Int32Array = [Some(1), None, Some(3)].into_iter().collect();
1065
1066 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
1067 let expected = ArrowError::InvalidArgumentError(
1068 "Found null values in run_ends array. The run_ends array should not have null values."
1069 .to_string(),
1070 );
1071 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
1072 }
1073
1074 #[test]
1075 fn test_run_array_run_ends_with_zeroes() {
1076 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1077 .into_iter()
1078 .collect();
1079 let run_ends: Int32Array = [Some(0), Some(1), Some(3)].into_iter().collect();
1080
1081 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
1082 let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly positive. Found value 0 at index 0 that does not match the criteria.".to_string());
1083 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
1084 }
1085
1086 #[test]
1087 fn test_run_array_run_ends_non_increasing() {
1088 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1089 .into_iter()
1090 .collect();
1091 let run_ends: Int32Array = [Some(1), Some(4), Some(4)].into_iter().collect();
1092
1093 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
1094 let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly increasing. Found value 4 at index 2 with previous value 4 that does not match the criteria.".to_string());
1095 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
1096 }
1097
1098 #[test]
1099 #[should_panic(expected = "Incorrect run ends type")]
1100 fn test_run_array_run_ends_data_type_mismatch() {
1101 let a = RunArray::<Int32Type>::from_iter(["32"]);
1102 let _ = RunArray::<Int64Type>::from(a.into_data());
1103 }
1104
1105 #[test]
1106 fn test_ree_array_accessor() {
1107 let input_array = build_input_array(256);
1108
1109 let mut builder =
1111 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
1112 builder.extend(input_array.iter().copied());
1113 let run_array = builder.finish();
1114 let typed = run_array.downcast::<PrimitiveArray<Int32Type>>().unwrap();
1115
1116 for (i, inp_val) in input_array.iter().enumerate() {
1118 if let Some(val) = inp_val {
1119 let actual = typed.value(i);
1120 assert_eq!(*val, actual)
1121 } else {
1122 let physical_ix = run_array.get_physical_index(i);
1123 assert!(typed.values().is_null(physical_ix));
1124 };
1125 }
1126 }
1127
1128 #[test]
1129 #[cfg_attr(miri, ignore)] fn test_get_physical_indices() {
1131 for logical_len in (0..250).step_by(10) {
1133 let input_array = build_input_array(logical_len);
1134
1135 let mut builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
1137 builder.extend(input_array.clone().into_iter());
1138
1139 let run_array = builder.finish();
1140 let physical_values_array = run_array.values().as_primitive::<Int32Type>();
1141
1142 let mut logical_indices: Vec<u32> = (0_u32..(logical_len as u32)).collect();
1144 logical_indices.append(&mut logical_indices.clone());
1146 let mut rng = rng();
1147 logical_indices.shuffle(&mut rng);
1148
1149 let physical_indices = run_array.get_physical_indices(&logical_indices).unwrap();
1150
1151 assert_eq!(logical_indices.len(), physical_indices.len());
1152
1153 compare_logical_and_physical_indices(
1155 &logical_indices,
1156 &input_array,
1157 &physical_indices,
1158 physical_values_array,
1159 );
1160 }
1161 }
1162
1163 #[test]
1164 #[cfg_attr(miri, ignore)] fn test_get_physical_indices_sliced() {
1166 let total_len = 80;
1167 let input_array = build_input_array(total_len);
1168
1169 let mut builder =
1171 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
1172 builder.extend(input_array.iter().copied());
1173 let run_array = builder.finish();
1174 let physical_values_array = run_array.values().as_primitive::<Int32Type>();
1175
1176 for slice_len in 1..=total_len {
1178 let mut logical_indices: Vec<u32> = (0_u32..(slice_len as u32)).collect();
1180 logical_indices.append(&mut logical_indices.clone());
1182 let mut rng = rng();
1183 logical_indices.shuffle(&mut rng);
1184
1185 let sliced_input_array = &input_array[0..slice_len];
1188
1189 let sliced_run_array: RunArray<Int16Type> =
1191 run_array.slice(0, slice_len).into_data().into();
1192
1193 let physical_indices = sliced_run_array
1195 .get_physical_indices(&logical_indices)
1196 .unwrap();
1197
1198 compare_logical_and_physical_indices(
1199 &logical_indices,
1200 sliced_input_array,
1201 &physical_indices,
1202 physical_values_array,
1203 );
1204
1205 let sliced_input_array = &input_array[total_len - slice_len..total_len];
1208
1209 let sliced_run_array: RunArray<Int16Type> = run_array
1211 .slice(total_len - slice_len, slice_len)
1212 .into_data()
1213 .into();
1214
1215 let physical_indices = sliced_run_array
1217 .get_physical_indices(&logical_indices)
1218 .unwrap();
1219
1220 compare_logical_and_physical_indices(
1221 &logical_indices,
1222 sliced_input_array,
1223 &physical_indices,
1224 physical_values_array,
1225 );
1226 }
1227 }
1228
1229 #[test]
1230 fn test_logical_nulls() {
1231 let run = Int32Array::from(vec![3, 6, 9, 12]);
1232 let values = Int32Array::from(vec![Some(0), None, Some(1), None]);
1233 let array = RunArray::try_new(&run, &values).unwrap();
1234
1235 let expected = [
1236 true, true, true, false, false, false, true, true, true, false, false, false,
1237 ];
1238
1239 let n = array.logical_nulls().unwrap();
1240 assert_eq!(n.null_count(), 6);
1241
1242 let slices = [(0, 12), (0, 2), (2, 5), (3, 0), (3, 3), (3, 4), (4, 8)];
1243 for (offset, length) in slices {
1244 let a = array.slice(offset, length);
1245 let n = a.logical_nulls().unwrap();
1246 let n = n.into_iter().collect::<Vec<_>>();
1247 assert_eq!(&n, &expected[offset..offset + length], "{offset} {length}");
1248 }
1249 }
1250
1251 #[test]
1252 fn test_run_array_eq_identical() {
1253 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1254 let values1 = StringArray::from(vec!["a", "b", "c"]);
1255 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1256
1257 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1258 let values2 = StringArray::from(vec!["a", "b", "c"]);
1259 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1260
1261 assert_eq!(array1, array2);
1262 }
1263
1264 #[test]
1265 fn test_run_array_ne_different_run_ends() {
1266 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1267 let values1 = StringArray::from(vec!["a", "b", "c"]);
1268 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1269
1270 let run_ends2 = Int32Array::from(vec![1, 4, 6]);
1271 let values2 = StringArray::from(vec!["a", "b", "c"]);
1272 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1273
1274 assert_ne!(array1, array2);
1275 }
1276
1277 #[test]
1278 fn test_run_array_ne_different_values() {
1279 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1280 let values1 = StringArray::from(vec!["a", "b", "c"]);
1281 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1282
1283 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1284 let values2 = StringArray::from(vec!["a", "b", "d"]);
1285 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1286
1287 assert_ne!(array1, array2);
1288 }
1289
1290 #[test]
1291 fn test_run_array_eq_with_nulls() {
1292 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1293 let values1 = StringArray::from(vec![Some("a"), None, Some("c")]);
1294 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1295
1296 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1297 let values2 = StringArray::from(vec![Some("a"), None, Some("c")]);
1298 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1299
1300 assert_eq!(array1, array2);
1301 }
1302
1303 #[test]
1304 fn test_run_array_eq_different_run_end_types() {
1305 let run_ends_i16_1 = Int16Array::from(vec![2_i16, 4, 6]);
1306 let values_i16_1 = StringArray::from(vec!["a", "b", "c"]);
1307 let array_i16_1 = RunArray::<Int16Type>::try_new(&run_ends_i16_1, &values_i16_1).unwrap();
1308
1309 let run_ends_i16_2 = Int16Array::from(vec![2_i16, 4, 6]);
1310 let values_i16_2 = StringArray::from(vec!["a", "b", "c"]);
1311 let array_i16_2 = RunArray::<Int16Type>::try_new(&run_ends_i16_2, &values_i16_2).unwrap();
1312
1313 assert_eq!(array_i16_1, array_i16_2);
1314 }
1315
1316 #[test]
1317 fn test_run_array_values_slice() {
1318 let run_ends: PrimitiveArray<Int32Type> = vec![2, 5, 20].into();
1320 let values: PrimitiveArray<Int32Type> = vec![0, 1, 2].into();
1321 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1322
1323 let slice = array.slice(1, 4); assert_eq!(slice.get_start_physical_index(), 0);
1330 assert_eq!(slice.get_end_physical_index(), 1);
1331
1332 let values_slice = slice.values_slice();
1333 let values_slice = values_slice.as_primitive::<Int32Type>();
1334 assert_eq!(values_slice.values(), &[0, 1]);
1335
1336 let slice2 = array.slice(2, 3); assert_eq!(slice2.get_start_physical_index(), 1);
1340 assert_eq!(slice2.get_end_physical_index(), 1);
1341
1342 let values_slice2 = slice2.values_slice();
1343 let values_slice2 = values_slice2.as_primitive::<Int32Type>();
1344 assert_eq!(values_slice2.values(), &[1]);
1345 }
1346
1347 #[test]
1348 fn test_run_array_values_slice_empty() {
1349 let run_ends = Int32Array::from(vec![2, 5, 10]);
1350 let values = StringArray::from(vec!["a", "b", "c"]);
1351 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1352
1353 let slice = array.slice(0, 0);
1354 assert_eq!(slice.len(), 0);
1355
1356 let values_slice = slice.values_slice();
1357 assert_eq!(values_slice.len(), 0);
1358 assert_eq!(values_slice.data_type(), &DataType::Utf8);
1359 }
1360
1361 #[test]
1362 fn test_run_array_eq_empty() {
1363 let run_ends = Int32Array::from(vec![2, 5, 10]);
1364 let values = StringArray::from(vec!["a", "b", "c"]);
1365 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1366
1367 let slice1 = array.slice(0, 0);
1368 let slice2 = array.slice(1, 0);
1369 let slice3 = array.slice(10, 0);
1370
1371 assert_eq!(slice1, slice2);
1372 assert_eq!(slice2, slice3);
1373
1374 let empty_array = new_empty_array(array.data_type());
1375 let empty_array = crate::cast::as_run_array::<Int32Type>(empty_array.as_ref());
1376
1377 assert_eq!(&slice1, empty_array);
1378 }
1379
1380 #[test]
1381 fn test_run_array_eq_diff_physical_same_logical() {
1382 let run_ends1 = Int32Array::from(vec![1, 3, 6]);
1383 let values1 = StringArray::from(vec!["a", "b", "c"]);
1384 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1385
1386 let run_ends2 = Int32Array::from(vec![1, 2, 3, 4, 5, 6]);
1387 let values2 = StringArray::from(vec!["a", "b", "b", "c", "c", "c"]);
1388 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1389
1390 assert_eq!(array1, array2);
1391 }
1392
1393 #[test]
1394 fn test_run_array_eq_sliced() {
1395 let run_ends1 = Int32Array::from(vec![2, 5, 10]);
1396 let values1 = StringArray::from(vec!["a", "b", "c"]);
1397 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1398 let slice1 = array1.slice(1, 6);
1401 let run_ends2 = Int32Array::from(vec![1, 4, 6]);
1404 let values2 = StringArray::from(vec!["a", "b", "c"]);
1405 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1406 assert_eq!(slice1, array2);
1409
1410 let slice2 = array1.slice(2, 3);
1411 let run_ends3 = Int32Array::from(vec![3]);
1413 let values3 = StringArray::from(vec!["b"]);
1414 let array3 = RunArray::<Int32Type>::try_new(&run_ends3, &values3).unwrap();
1415 assert_eq!(slice2, array3);
1416 }
1417
1418 #[test]
1419 fn test_run_array_eq_sliced_different_offsets() {
1420 let run_ends1 = Int32Array::from(vec![2, 5, 10]);
1421 let values1 = StringArray::from(vec!["a", "b", "c"]);
1422 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1423 let array2 = array1.clone();
1424 assert_eq!(array1, array2);
1425
1426 let slice1 = array1.slice(1, 4); let slice2 = array1.slice(1, 4);
1428 assert_eq!(slice1, slice2);
1429
1430 let slice3 = array1.slice(0, 4); assert_ne!(slice1, slice3);
1432 }
1433
1434 #[test]
1435 #[cfg(not(feature = "force_validate"))]
1436 fn allow_to_create_invalid_array_using_new_unchecked() {
1437 let valid = RunArray::<Int32Type>::from_iter(["32"]);
1438 let (_, buffer, values) = valid.into_parts();
1439
1440 let _ = unsafe {
1441 RunArray::<Int32Type>::new_unchecked(DataType::Int64, buffer, values)
1443 };
1444 }
1445
1446 #[test]
1447 #[should_panic(
1448 expected = "Invalid data type Int64 for RunArray. Should be DataType::RunEndEncoded"
1449 )]
1450 #[cfg(feature = "force_validate")]
1451 fn should_not_be_able_to_create_invalid_array_using_new_unchecked_when_force_validate_is_enabled()
1452 {
1453 let valid = RunArray::<Int32Type>::from_iter(["32"]);
1454 let (_, buffer, values) = valid.into_parts();
1455
1456 let _ = unsafe {
1457 RunArray::<Int32Type>::new_unchecked(DataType::Int64, buffer, values)
1459 };
1460 }
1461
1462 #[test]
1463 fn test_run_array_roundtrip() {
1464 let run = Int32Array::from(vec![3, 6, 9, 12]);
1465 let values = Int32Array::from(vec![Some(0), None, Some(1), None]);
1466 let array = RunArray::try_new(&run, &values).unwrap();
1467
1468 let (dt, buffer, values) = array.clone().into_parts();
1469 let created_from_parts =
1470 unsafe { RunArray::<Int32Type>::new_unchecked(dt, buffer, values) };
1471 assert_eq!(array, created_from_parts);
1472 }
1473}