1use std::any::Any;
19use std::sync::Arc;
20
21use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, RunEndBuffer};
22use arrow_data::{ArrayData, ArrayDataBuilder};
23use arrow_schema::{ArrowError, DataType, Field};
24
25use crate::{
26    builder::StringRunBuilder,
27    make_array,
28    run_iterator::RunArrayIter,
29    types::{Int16Type, Int32Type, Int64Type, RunEndIndexType},
30    Array, ArrayAccessor, ArrayRef, PrimitiveArray,
31};
32
33pub struct RunArray<R: RunEndIndexType> {
64    data_type: DataType,
65    run_ends: RunEndBuffer<R::Native>,
66    values: ArrayRef,
67}
68
69impl<R: RunEndIndexType> Clone for RunArray<R> {
70    fn clone(&self) -> Self {
71        Self {
72            data_type: self.data_type.clone(),
73            run_ends: self.run_ends.clone(),
74            values: self.values.clone(),
75        }
76    }
77}
78
79impl<R: RunEndIndexType> RunArray<R> {
80    pub fn logical_len(run_ends: &PrimitiveArray<R>) -> usize {
83        let len = run_ends.len();
84        if len == 0 {
85            return 0;
86        }
87        run_ends.value(len - 1).as_usize()
88    }
89
90    pub fn try_new(run_ends: &PrimitiveArray<R>, values: &dyn Array) -> Result<Self, ArrowError> {
94        let run_ends_type = run_ends.data_type().clone();
95        let values_type = values.data_type().clone();
96        let ree_array_type = DataType::RunEndEncoded(
97            Arc::new(Field::new("run_ends", run_ends_type, false)),
98            Arc::new(Field::new("values", values_type, true)),
99        );
100        let len = RunArray::logical_len(run_ends);
101        let builder = ArrayDataBuilder::new(ree_array_type)
102            .len(len)
103            .add_child_data(run_ends.to_data())
104            .add_child_data(values.to_data());
105
106        let array_data = unsafe { builder.build_unchecked() };
108
109        array_data.validate_data()?;
116
117        Ok(array_data.into())
118    }
119
120    pub fn run_ends(&self) -> &RunEndBuffer<R::Native> {
122        &self.run_ends
123    }
124
125    pub fn values(&self) -> &ArrayRef {
130        &self.values
131    }
132
133    pub fn get_start_physical_index(&self) -> usize {
135        self.run_ends.get_start_physical_index()
136    }
137
138    pub fn get_end_physical_index(&self) -> usize {
140        self.run_ends.get_end_physical_index()
141    }
142
143    pub fn downcast<V: 'static>(&self) -> Option<TypedRunArray<'_, R, V>> {
157        let values = self.values.as_any().downcast_ref()?;
158        Some(TypedRunArray {
159            run_array: self,
160            values,
161        })
162    }
163
164    pub fn get_physical_index(&self, logical_index: usize) -> usize {
170        self.run_ends.get_physical_index(logical_index)
171    }
172
173    #[inline]
181    pub fn get_physical_indices<I>(&self, logical_indices: &[I]) -> Result<Vec<usize>, ArrowError>
182    where
183        I: ArrowNativeType,
184    {
185        let len = self.run_ends().len();
186        let offset = self.run_ends().offset();
187
188        let indices_len = logical_indices.len();
189
190        if indices_len == 0 {
191            return Ok(vec![]);
192        }
193
194        let mut ordered_indices: Vec<usize> = (0..indices_len).collect();
197
198        ordered_indices.sort_unstable_by(|lhs, rhs| {
201            logical_indices[*lhs]
202                .partial_cmp(&logical_indices[*rhs])
203                .unwrap()
204        });
205
206        let largest_logical_index = logical_indices[*ordered_indices.last().unwrap()].as_usize();
208        if largest_logical_index >= len {
209            return Err(ArrowError::InvalidArgumentError(format!(
210                "Cannot convert all logical indices to physical indices. The logical index cannot be converted is {largest_logical_index}.",
211            )));
212        }
213
214        let skip_value = self.get_start_physical_index();
216
217        let mut physical_indices = vec![0; indices_len];
218
219        let mut ordered_index = 0_usize;
220        for (physical_index, run_end) in self.run_ends.values().iter().enumerate().skip(skip_value)
221        {
222            let run_end_value = run_end.as_usize() - offset;
224
225            while ordered_index < indices_len
228                && logical_indices[ordered_indices[ordered_index]].as_usize() < run_end_value
229            {
230                physical_indices[ordered_indices[ordered_index]] = physical_index;
231                ordered_index += 1;
232            }
233        }
234
235        if ordered_index < logical_indices.len() {
238            let logical_index = logical_indices[ordered_indices[ordered_index]].as_usize();
239            return Err(ArrowError::InvalidArgumentError(format!(
240                "Cannot convert all logical indices to physical indices. The logical index cannot be converted is {logical_index}.",
241            )));
242        }
243        Ok(physical_indices)
244    }
245
246    pub fn slice(&self, offset: usize, length: usize) -> Self {
248        Self {
249            data_type: self.data_type.clone(),
250            run_ends: self.run_ends.slice(offset, length),
251            values: self.values.clone(),
252        }
253    }
254}
255
256impl<R: RunEndIndexType> From<ArrayData> for RunArray<R> {
257    fn from(data: ArrayData) -> Self {
259        match data.data_type() {
260            DataType::RunEndEncoded(_, _) => {}
261            _ => {
262                panic!("Invalid data type for RunArray. The data type should be DataType::RunEndEncoded");
263            }
264        }
265
266        let child = &data.child_data()[0];
269        assert_eq!(child.data_type(), &R::DATA_TYPE, "Incorrect run ends type");
270        let run_ends = unsafe {
271            let scalar = child.buffers()[0].clone().into();
272            RunEndBuffer::new_unchecked(scalar, data.offset(), data.len())
273        };
274
275        let values = make_array(data.child_data()[1].clone());
276        Self {
277            data_type: data.data_type().clone(),
278            run_ends,
279            values,
280        }
281    }
282}
283
284impl<R: RunEndIndexType> From<RunArray<R>> for ArrayData {
285    fn from(array: RunArray<R>) -> Self {
286        let len = array.run_ends.len();
287        let offset = array.run_ends.offset();
288
289        let run_ends = ArrayDataBuilder::new(R::DATA_TYPE)
290            .len(array.run_ends.values().len())
291            .buffers(vec![array.run_ends.into_inner().into_inner()]);
292
293        let run_ends = unsafe { run_ends.build_unchecked() };
294
295        let builder = ArrayDataBuilder::new(array.data_type)
296            .len(len)
297            .offset(offset)
298            .child_data(vec![run_ends, array.values.to_data()]);
299
300        unsafe { builder.build_unchecked() }
301    }
302}
303
304impl<T: RunEndIndexType> Array for RunArray<T> {
305    fn as_any(&self) -> &dyn Any {
306        self
307    }
308
309    fn to_data(&self) -> ArrayData {
310        self.clone().into()
311    }
312
313    fn into_data(self) -> ArrayData {
314        self.into()
315    }
316
317    fn data_type(&self) -> &DataType {
318        &self.data_type
319    }
320
321    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
322        Arc::new(self.slice(offset, length))
323    }
324
325    fn len(&self) -> usize {
326        self.run_ends.len()
327    }
328
329    fn is_empty(&self) -> bool {
330        self.run_ends.is_empty()
331    }
332
333    fn shrink_to_fit(&mut self) {
334        self.run_ends.shrink_to_fit();
335        self.values.shrink_to_fit();
336    }
337
338    fn offset(&self) -> usize {
339        self.run_ends.offset()
340    }
341
342    fn nulls(&self) -> Option<&NullBuffer> {
343        None
344    }
345
346    fn logical_nulls(&self) -> Option<NullBuffer> {
347        let len = self.len();
348        let nulls = self.values.logical_nulls()?;
349        let mut out = BooleanBufferBuilder::new(len);
350        let offset = self.run_ends.offset();
351        let mut valid_start = 0;
352        let mut last_end = 0;
353        for (idx, end) in self.run_ends.values().iter().enumerate() {
354            let end = end.as_usize();
355            if end < offset {
356                continue;
357            }
358            let end = (end - offset).min(len);
359            if nulls.is_null(idx) {
360                if valid_start < last_end {
361                    out.append_n(last_end - valid_start, true);
362                }
363                out.append_n(end - last_end, false);
364                valid_start = end;
365            }
366            last_end = end;
367            if end == len {
368                break;
369            }
370        }
371        if valid_start < len {
372            out.append_n(len - valid_start, true)
373        }
374        assert_eq!(out.len(), len);
376        Some(out.finish().into())
377    }
378
379    fn is_nullable(&self) -> bool {
380        !self.is_empty() && self.values.is_nullable()
381    }
382
383    fn get_buffer_memory_size(&self) -> usize {
384        self.run_ends.inner().inner().capacity() + self.values.get_buffer_memory_size()
385    }
386
387    fn get_array_memory_size(&self) -> usize {
388        std::mem::size_of::<Self>()
389            + self.run_ends.inner().inner().capacity()
390            + self.values.get_array_memory_size()
391    }
392}
393
394impl<R: RunEndIndexType> std::fmt::Debug for RunArray<R> {
395    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
396        writeln!(
397            f,
398            "RunArray {{run_ends: {:?}, values: {:?}}}",
399            self.run_ends.values(),
400            self.values
401        )
402    }
403}
404
405impl<'a, T: RunEndIndexType> FromIterator<Option<&'a str>> for RunArray<T> {
422    fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
423        let it = iter.into_iter();
424        let (lower, _) = it.size_hint();
425        let mut builder = StringRunBuilder::with_capacity(lower, 256);
426        it.for_each(|i| {
427            builder.append_option(i);
428        });
429
430        builder.finish()
431    }
432}
433
434impl<'a, T: RunEndIndexType> FromIterator<&'a str> for RunArray<T> {
449    fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
450        let it = iter.into_iter();
451        let (lower, _) = it.size_hint();
452        let mut builder = StringRunBuilder::with_capacity(lower, 256);
453        it.for_each(|i| {
454            builder.append_value(i);
455        });
456
457        builder.finish()
458    }
459}
460
461pub type Int16RunArray = RunArray<Int16Type>;
475
476pub type Int32RunArray = RunArray<Int32Type>;
490
491pub type Int64RunArray = RunArray<Int64Type>;
505
506pub struct TypedRunArray<'a, R: RunEndIndexType, V> {
524    run_array: &'a RunArray<R>,
526
527    values: &'a V,
529}
530
531impl<R: RunEndIndexType, V> Clone for TypedRunArray<'_, R, V> {
533    fn clone(&self) -> Self {
534        *self
535    }
536}
537
538impl<R: RunEndIndexType, V> Copy for TypedRunArray<'_, R, V> {}
539
540impl<R: RunEndIndexType, V> std::fmt::Debug for TypedRunArray<'_, R, V> {
541    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
542        writeln!(f, "TypedRunArray({:?})", self.run_array)
543    }
544}
545
546impl<'a, R: RunEndIndexType, V> TypedRunArray<'a, R, V> {
547    pub fn run_ends(&self) -> &'a RunEndBuffer<R::Native> {
549        self.run_array.run_ends()
550    }
551
552    pub fn values(&self) -> &'a V {
554        self.values
555    }
556
557    pub fn run_array(&self) -> &'a RunArray<R> {
559        self.run_array
560    }
561}
562
563impl<R: RunEndIndexType, V: Sync> Array for TypedRunArray<'_, R, V> {
564    fn as_any(&self) -> &dyn Any {
565        self.run_array
566    }
567
568    fn to_data(&self) -> ArrayData {
569        self.run_array.to_data()
570    }
571
572    fn into_data(self) -> ArrayData {
573        self.run_array.into_data()
574    }
575
576    fn data_type(&self) -> &DataType {
577        self.run_array.data_type()
578    }
579
580    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
581        Arc::new(self.run_array.slice(offset, length))
582    }
583
584    fn len(&self) -> usize {
585        self.run_array.len()
586    }
587
588    fn is_empty(&self) -> bool {
589        self.run_array.is_empty()
590    }
591
592    fn offset(&self) -> usize {
593        self.run_array.offset()
594    }
595
596    fn nulls(&self) -> Option<&NullBuffer> {
597        self.run_array.nulls()
598    }
599
600    fn logical_nulls(&self) -> Option<NullBuffer> {
601        self.run_array.logical_nulls()
602    }
603
604    fn logical_null_count(&self) -> usize {
605        self.run_array.logical_null_count()
606    }
607
608    fn is_nullable(&self) -> bool {
609        self.run_array.is_nullable()
610    }
611
612    fn get_buffer_memory_size(&self) -> usize {
613        self.run_array.get_buffer_memory_size()
614    }
615
616    fn get_array_memory_size(&self) -> usize {
617        self.run_array.get_array_memory_size()
618    }
619}
620
621impl<'a, R, V> ArrayAccessor for TypedRunArray<'a, R, V>
624where
625    R: RunEndIndexType,
626    V: Sync + Send,
627    &'a V: ArrayAccessor,
628    <&'a V as ArrayAccessor>::Item: Default,
629{
630    type Item = <&'a V as ArrayAccessor>::Item;
631
632    fn value(&self, logical_index: usize) -> Self::Item {
633        assert!(
634            logical_index < self.len(),
635            "Trying to access an element at index {} from a TypedRunArray of length {}",
636            logical_index,
637            self.len()
638        );
639        unsafe { self.value_unchecked(logical_index) }
640    }
641
642    unsafe fn value_unchecked(&self, logical_index: usize) -> Self::Item {
643        let physical_index = self.run_array.get_physical_index(logical_index);
644        self.values().value_unchecked(physical_index)
645    }
646}
647
648impl<'a, R, V> IntoIterator for TypedRunArray<'a, R, V>
649where
650    R: RunEndIndexType,
651    V: Sync + Send,
652    &'a V: ArrayAccessor,
653    <&'a V as ArrayAccessor>::Item: Default,
654{
655    type Item = Option<<&'a V as ArrayAccessor>::Item>;
656    type IntoIter = RunArrayIter<'a, R, V>;
657
658    fn into_iter(self) -> Self::IntoIter {
659        RunArrayIter::new(self)
660    }
661}
662
663#[cfg(test)]
664mod tests {
665    use rand::rng;
666    use rand::seq::SliceRandom;
667    use rand::Rng;
668
669    use super::*;
670    use crate::builder::PrimitiveRunBuilder;
671    use crate::cast::AsArray;
672    use crate::types::{Int8Type, UInt32Type};
673    use crate::{Int32Array, StringArray};
674
675    fn build_input_array(size: usize) -> Vec<Option<i32>> {
676        let mut seed: Vec<Option<i32>> = vec![
679            None,
680            None,
681            None,
682            Some(1),
683            Some(2),
684            Some(3),
685            Some(4),
686            Some(5),
687            Some(6),
688            Some(7),
689            Some(8),
690            Some(9),
691        ];
692        let mut result: Vec<Option<i32>> = Vec::with_capacity(size);
693        let mut ix = 0;
694        let mut rng = rng();
695        let max_run_length = 8_usize.min(1_usize.max(size / 2));
697        while result.len() < size {
698            if ix == 0 {
700                seed.shuffle(&mut rng);
701            }
702            let num = max_run_length.min(rng.random_range(1..=max_run_length));
704            for _ in 0..num {
705                result.push(seed[ix]);
706            }
707            ix += 1;
708            if ix == seed.len() {
709                ix = 0
710            }
711        }
712        result.resize(size, None);
713        result
714    }
715
716    fn compare_logical_and_physical_indices(
718        logical_indices: &[u32],
719        logical_array: &[Option<i32>],
720        physical_indices: &[usize],
721        physical_array: &PrimitiveArray<Int32Type>,
722    ) {
723        assert_eq!(logical_indices.len(), physical_indices.len());
724
725        logical_indices
727            .iter()
728            .map(|f| f.as_usize())
729            .zip(physical_indices.iter())
730            .for_each(|(logical_ix, physical_ix)| {
731                let expected = logical_array[logical_ix];
732                match expected {
733                    Some(val) => {
734                        assert!(physical_array.is_valid(*physical_ix));
735                        let actual = physical_array.value(*physical_ix);
736                        assert_eq!(val, actual);
737                    }
738                    None => {
739                        assert!(physical_array.is_null(*physical_ix))
740                    }
741                };
742            });
743    }
744    #[test]
745    fn test_run_array() {
746        let value_data =
748            PrimitiveArray::<Int8Type>::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
749
750        let run_ends_values = [4_i16, 6, 7, 9, 13, 18, 20, 22];
752        let run_ends_data =
753            PrimitiveArray::<Int16Type>::from_iter_values(run_ends_values.iter().copied());
754
755        let ree_array = RunArray::<Int16Type>::try_new(&run_ends_data, &value_data).unwrap();
757
758        assert_eq!(ree_array.len(), 22);
759        assert_eq!(ree_array.null_count(), 0);
760
761        let values = ree_array.values();
762        assert_eq!(value_data.into_data(), values.to_data());
763        assert_eq!(&DataType::Int8, values.data_type());
764
765        let run_ends = ree_array.run_ends();
766        assert_eq!(run_ends.values(), &run_ends_values);
767    }
768
769    #[test]
770    fn test_run_array_fmt_debug() {
771        let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(3);
772        builder.append_value(12345678);
773        builder.append_null();
774        builder.append_value(22345678);
775        let array = builder.finish();
776        assert_eq!(
777            "RunArray {run_ends: [1, 2, 3], values: PrimitiveArray<UInt32>\n[\n  12345678,\n  null,\n  22345678,\n]}\n",
778            format!("{array:?}")
779        );
780
781        let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(20);
782        for _ in 0..20 {
783            builder.append_value(1);
784        }
785        let array = builder.finish();
786
787        assert_eq!(array.len(), 20);
788        assert_eq!(array.null_count(), 0);
789        assert_eq!(array.logical_null_count(), 0);
790
791        assert_eq!(
792            "RunArray {run_ends: [20], values: PrimitiveArray<UInt32>\n[\n  1,\n]}\n",
793            format!("{array:?}")
794        );
795    }
796
797    #[test]
798    fn test_run_array_from_iter() {
799        let test = vec!["a", "a", "b", "c"];
800        let array: RunArray<Int16Type> = test
801            .iter()
802            .map(|&x| if x == "b" { None } else { Some(x) })
803            .collect();
804        assert_eq!(
805            "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n  \"a\",\n  null,\n  \"c\",\n]}\n",
806            format!("{array:?}")
807        );
808
809        assert_eq!(array.len(), 4);
810        assert_eq!(array.null_count(), 0);
811        assert_eq!(array.logical_null_count(), 1);
812
813        let array: RunArray<Int16Type> = test.into_iter().collect();
814        assert_eq!(
815            "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n  \"a\",\n  \"b\",\n  \"c\",\n]}\n",
816            format!("{array:?}")
817        );
818    }
819
820    #[test]
821    fn test_run_array_run_ends_as_primitive_array() {
822        let test = vec!["a", "b", "c", "a"];
823        let array: RunArray<Int16Type> = test.into_iter().collect();
824
825        assert_eq!(array.len(), 4);
826        assert_eq!(array.null_count(), 0);
827        assert_eq!(array.logical_null_count(), 0);
828
829        let run_ends = array.run_ends();
830        assert_eq!(&[1, 2, 3, 4], run_ends.values());
831    }
832
833    #[test]
834    fn test_run_array_as_primitive_array_with_null() {
835        let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
836        let array: RunArray<Int32Type> = test.into_iter().collect();
837
838        assert_eq!(array.len(), 6);
839        assert_eq!(array.null_count(), 0);
840        assert_eq!(array.logical_null_count(), 3);
841
842        let run_ends = array.run_ends();
843        assert_eq!(&[1, 2, 3, 5, 6], run_ends.values());
844
845        let values_data = array.values();
846        assert_eq!(2, values_data.null_count());
847        assert_eq!(5, values_data.len());
848    }
849
850    #[test]
851    fn test_run_array_all_nulls() {
852        let test = vec![None, None, None];
853        let array: RunArray<Int32Type> = test.into_iter().collect();
854
855        assert_eq!(array.len(), 3);
856        assert_eq!(array.null_count(), 0);
857        assert_eq!(array.logical_null_count(), 3);
858
859        let run_ends = array.run_ends();
860        assert_eq!(3, run_ends.len());
861        assert_eq!(&[3], run_ends.values());
862
863        let values_data = array.values();
864        assert_eq!(1, values_data.null_count());
865    }
866
867    #[test]
868    fn test_run_array_try_new() {
869        let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
870            .into_iter()
871            .collect();
872        let run_ends: Int32Array = [Some(1), Some(2), Some(3), Some(4)].into_iter().collect();
873
874        let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
875        assert_eq!(array.values().data_type(), &DataType::Utf8);
876
877        assert_eq!(array.null_count(), 0);
878        assert_eq!(array.logical_null_count(), 1);
879        assert_eq!(array.len(), 4);
880        assert_eq!(array.values().null_count(), 1);
881
882        assert_eq!(
883            "RunArray {run_ends: [1, 2, 3, 4], values: StringArray\n[\n  \"foo\",\n  \"bar\",\n  null,\n  \"baz\",\n]}\n",
884            format!("{array:?}")
885        );
886    }
887
888    #[test]
889    fn test_run_array_int16_type_definition() {
890        let array: Int16RunArray = vec!["a", "a", "b", "c", "c"].into_iter().collect();
891        let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", "c"]));
892        assert_eq!(array.run_ends().values(), &[2, 3, 5]);
893        assert_eq!(array.values(), &values);
894    }
895
896    #[test]
897    fn test_run_array_empty_string() {
898        let array: Int16RunArray = vec!["a", "a", "", "", "c"].into_iter().collect();
899        let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "", "c"]));
900        assert_eq!(array.run_ends().values(), &[2, 4, 5]);
901        assert_eq!(array.values(), &values);
902    }
903
904    #[test]
905    fn test_run_array_length_mismatch() {
906        let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
907            .into_iter()
908            .collect();
909        let run_ends: Int32Array = [Some(1), Some(2), Some(3)].into_iter().collect();
910
911        let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
912        let expected = ArrowError::InvalidArgumentError("The run_ends array length should be the same as values array length. Run_ends array length is 3, values array length is 4".to_string());
913        assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
914    }
915
916    #[test]
917    fn test_run_array_run_ends_with_null() {
918        let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
919            .into_iter()
920            .collect();
921        let run_ends: Int32Array = [Some(1), None, Some(3)].into_iter().collect();
922
923        let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
924        let expected = ArrowError::InvalidArgumentError(
925            "Found null values in run_ends array. The run_ends array should not have null values."
926                .to_string(),
927        );
928        assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
929    }
930
931    #[test]
932    fn test_run_array_run_ends_with_zeroes() {
933        let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
934            .into_iter()
935            .collect();
936        let run_ends: Int32Array = [Some(0), Some(1), Some(3)].into_iter().collect();
937
938        let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
939        let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly positive. Found value 0 at index 0 that does not match the criteria.".to_string());
940        assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
941    }
942
943    #[test]
944    fn test_run_array_run_ends_non_increasing() {
945        let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
946            .into_iter()
947            .collect();
948        let run_ends: Int32Array = [Some(1), Some(4), Some(4)].into_iter().collect();
949
950        let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
951        let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly increasing. Found value 4 at index 2 with previous value 4 that does not match the criteria.".to_string());
952        assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
953    }
954
955    #[test]
956    #[should_panic(expected = "Incorrect run ends type")]
957    fn test_run_array_run_ends_data_type_mismatch() {
958        let a = RunArray::<Int32Type>::from_iter(["32"]);
959        let _ = RunArray::<Int64Type>::from(a.into_data());
960    }
961
962    #[test]
963    fn test_ree_array_accessor() {
964        let input_array = build_input_array(256);
965
966        let mut builder =
968            PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
969        builder.extend(input_array.iter().copied());
970        let run_array = builder.finish();
971        let typed = run_array.downcast::<PrimitiveArray<Int32Type>>().unwrap();
972
973        for (i, inp_val) in input_array.iter().enumerate() {
975            if let Some(val) = inp_val {
976                let actual = typed.value(i);
977                assert_eq!(*val, actual)
978            } else {
979                let physical_ix = run_array.get_physical_index(i);
980                assert!(typed.values().is_null(physical_ix));
981            };
982        }
983    }
984
985    #[test]
986    #[cfg_attr(miri, ignore)] fn test_get_physical_indices() {
988        for logical_len in (0..250).step_by(10) {
990            let input_array = build_input_array(logical_len);
991
992            let mut builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
994            builder.extend(input_array.clone().into_iter());
995
996            let run_array = builder.finish();
997            let physical_values_array = run_array.values().as_primitive::<Int32Type>();
998
999            let mut logical_indices: Vec<u32> = (0_u32..(logical_len as u32)).collect();
1001            logical_indices.append(&mut logical_indices.clone());
1003            let mut rng = rng();
1004            logical_indices.shuffle(&mut rng);
1005
1006            let physical_indices = run_array.get_physical_indices(&logical_indices).unwrap();
1007
1008            assert_eq!(logical_indices.len(), physical_indices.len());
1009
1010            compare_logical_and_physical_indices(
1012                &logical_indices,
1013                &input_array,
1014                &physical_indices,
1015                physical_values_array,
1016            );
1017        }
1018    }
1019
1020    #[test]
1021    #[cfg_attr(miri, ignore)] fn test_get_physical_indices_sliced() {
1023        let total_len = 80;
1024        let input_array = build_input_array(total_len);
1025
1026        let mut builder =
1028            PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
1029        builder.extend(input_array.iter().copied());
1030        let run_array = builder.finish();
1031        let physical_values_array = run_array.values().as_primitive::<Int32Type>();
1032
1033        for slice_len in 1..=total_len {
1035            let mut logical_indices: Vec<u32> = (0_u32..(slice_len as u32)).collect();
1037            logical_indices.append(&mut logical_indices.clone());
1039            let mut rng = rng();
1040            logical_indices.shuffle(&mut rng);
1041
1042            let sliced_input_array = &input_array[0..slice_len];
1045
1046            let sliced_run_array: RunArray<Int16Type> =
1048                run_array.slice(0, slice_len).into_data().into();
1049
1050            let physical_indices = sliced_run_array
1052                .get_physical_indices(&logical_indices)
1053                .unwrap();
1054
1055            compare_logical_and_physical_indices(
1056                &logical_indices,
1057                sliced_input_array,
1058                &physical_indices,
1059                physical_values_array,
1060            );
1061
1062            let sliced_input_array = &input_array[total_len - slice_len..total_len];
1065
1066            let sliced_run_array: RunArray<Int16Type> = run_array
1068                .slice(total_len - slice_len, slice_len)
1069                .into_data()
1070                .into();
1071
1072            let physical_indices = sliced_run_array
1074                .get_physical_indices(&logical_indices)
1075                .unwrap();
1076
1077            compare_logical_and_physical_indices(
1078                &logical_indices,
1079                sliced_input_array,
1080                &physical_indices,
1081                physical_values_array,
1082            );
1083        }
1084    }
1085
1086    #[test]
1087    fn test_logical_nulls() {
1088        let run = Int32Array::from(vec![3, 6, 9, 12]);
1089        let values = Int32Array::from(vec![Some(0), None, Some(1), None]);
1090        let array = RunArray::try_new(&run, &values).unwrap();
1091
1092        let expected = [
1093            true, true, true, false, false, false, true, true, true, false, false, false,
1094        ];
1095
1096        let n = array.logical_nulls().unwrap();
1097        assert_eq!(n.null_count(), 6);
1098
1099        let slices = [(0, 12), (0, 2), (2, 5), (3, 0), (3, 3), (3, 4), (4, 8)];
1100        for (offset, length) in slices {
1101            let a = array.slice(offset, length);
1102            let n = a.logical_nulls().unwrap();
1103            let n = n.into_iter().collect::<Vec<_>>();
1104            assert_eq!(&n, &expected[offset..offset + length], "{offset} {length}");
1105        }
1106    }
1107}