Skip to main content

arrow_array/array/
run_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::sync::Arc;
20
21use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, RunEndBuffer, ScalarBuffer};
22use arrow_data::{ArrayData, ArrayDataBuilder};
23use arrow_schema::{ArrowError, DataType, Field};
24
25use crate::{
26    Array, ArrayAccessor, ArrayRef, PrimitiveArray,
27    builder::StringRunBuilder,
28    make_array,
29    run_iterator::RunArrayIter,
30    types::{Int16Type, Int32Type, Int64Type, RunEndIndexType},
31};
32
33/// Recursively applies a function to the values of a RunEndEncoded array, preserving the run structure.
34///
35/// # Example
36///
37/// ```ignore
38/// let result = ree_recurse!(array, Int32Type, my_function)?;
39/// ```
40///
41/// This macro is useful for implementing functions that should work on the logical values
42/// of a REE array while preserving the run-end encoding structure.
43#[macro_export]
44macro_rules! ree_map {
45    ($array:expr, $run_type:ty, $func:expr) => {{
46        let ree = $array.as_run_opt::<$run_type>().unwrap();
47        let inner_values = $func(ree.values().as_ref())?;
48        Ok(std::sync::Arc::new(ree.with_values(inner_values)))
49    }};
50}
51
52/// An array of [run-end encoded values].
53///
54/// This encoding is variation on [run-length encoding (RLE)] and is good for representing
55/// data containing the same values repeated consecutively.
56///
57/// A [`RunArray`] consists of a `run_ends` buffer and a `values` array of equivalent
58/// lengths. The `run_ends` buffer stores the indexes at which the run ends. The
59/// `values` array stores the corresponding value of each run. The below example
60/// illustrates how a logical array is represented by a [`RunArray`]:
61///
62/// ```text
63/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┐
64///   ┌─────────────────┐  ┌─────────┐       ┌─────────────────┐
65/// │ │        A        │  │    2    │ │     │        A        │
66///   ├─────────────────┤  ├─────────┤       ├─────────────────┤
67/// │ │        D        │  │    3    │ │     │        A        │    run length of 'A' = runs_ends[0] - 0 = 2
68///   ├─────────────────┤  ├─────────┤       ├─────────────────┤
69/// │ │        B        │  │    6    │ │     │        D        │    run length of 'D' = run_ends[1] - run_ends[0] = 1
70///   └─────────────────┘  └─────────┘       ├─────────────────┤
71/// │        values          run_ends  │     │        B        │
72///                                          ├─────────────────┤
73/// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┘     │        B        │
74///                                          ├─────────────────┤
75///                RunArray                  │        B        │    run length of 'B' = run_ends[2] - run_ends[1] = 3
76///               length = 3                 └─────────────────┘
77///
78///                                             Logical array
79///                                                Contents
80/// ```
81///
82/// [run-end encoded values]: https://arrow.apache.org/docs/format/Columnar.html#run-end-encoded-layout
83/// [run-length encoding (RLE)]: https://en.wikipedia.org/wiki/Run-length_encoding
84pub struct RunArray<R: RunEndIndexType> {
85    data_type: DataType,
86    run_ends: RunEndBuffer<R::Native>,
87    values: ArrayRef,
88}
89
90impl<R: RunEndIndexType> Clone for RunArray<R> {
91    fn clone(&self) -> Self {
92        Self {
93            data_type: self.data_type.clone(),
94            run_ends: self.run_ends.clone(),
95            values: self.values.clone(),
96        }
97    }
98}
99
100impl<R: RunEndIndexType> RunArray<R> {
101    /// Calculates the logical length of the array encoded by treating the `run_ends`
102    /// array as if it were a [`RunEndBuffer`].
103    pub fn logical_len(run_ends: &PrimitiveArray<R>) -> usize {
104        let len = run_ends.len();
105        if len == 0 {
106            return 0;
107        }
108        run_ends.value(len - 1).as_usize()
109    }
110
111    /// Attempts to create a [`RunArray`] using the given `run_ends` and `values`.
112    ///
113    /// # Errors
114    ///
115    /// - If `run_ends` and `values` have different lengths
116    /// - If `run_ends` has any null values
117    /// - If `run_ends` doesn't consist of strictly increasing positive integers
118    pub fn try_new(run_ends: &PrimitiveArray<R>, values: &dyn Array) -> Result<Self, ArrowError> {
119        let run_ends_type = run_ends.data_type().clone();
120        let values_type = values.data_type().clone();
121        let ree_array_type = DataType::RunEndEncoded(
122            Arc::new(Field::new("run_ends", run_ends_type, false)),
123            Arc::new(Field::new("values", values_type, true)),
124        );
125        let len = RunArray::logical_len(run_ends);
126        let builder = ArrayDataBuilder::new(ree_array_type)
127            .len(len)
128            .add_child_data(run_ends.to_data())
129            .add_child_data(values.to_data());
130
131        // `build_unchecked` is used to avoid recursive validation of child arrays.
132        let array_data = unsafe { builder.build_unchecked() };
133
134        // Safety: `validate_data` checks below
135        //    1. The given array data has exactly two child arrays.
136        //    2. The first child array (run_ends) has valid data type.
137        //    3. run_ends array does not have null values
138        //    4. run_ends array has non-zero and strictly increasing values.
139        //    5. The length of run_ends array and values array are the same.
140        array_data.validate_data()?;
141
142        Ok(array_data.into())
143    }
144
145    /// Create a new [`RunArray`] from the provided parts, without validation
146    ///
147    /// # Safety
148    ///
149    /// Safe if [`Self::try_new`] would not error
150    pub unsafe fn new_unchecked(
151        data_type: DataType,
152        run_ends: RunEndBuffer<R::Native>,
153        values: ArrayRef,
154    ) -> Self {
155        if cfg!(feature = "force_validate") {
156            match &data_type {
157                DataType::RunEndEncoded(run_ends, values_field) => {
158                    assert!(!run_ends.is_nullable(), "run_ends should not be nullable");
159                    assert_eq!(
160                        run_ends.data_type(),
161                        &R::DATA_TYPE,
162                        "Incorrect run ends type"
163                    );
164                    assert_eq!(
165                        values_field.data_type(),
166                        values.data_type(),
167                        "Incorrect values type"
168                    );
169                }
170                _ => {
171                    panic!(
172                        "Invalid data type {data_type:?} for RunArray. Should be DataType::RunEndEncoded"
173                    );
174                }
175            }
176
177            let run_array = Self {
178                data_type,
179                run_ends,
180                values,
181            };
182
183            // Safety: `validate_data` checks below
184            //    1. The given array data has exactly two child arrays.
185            //    2. The first child array (run_ends) has valid data type.
186            //    3. run_ends array does not have null values
187            //    4. run_ends array has non-zero and strictly increasing values.
188            //    5. The length of run_ends array and values array are the same.
189            run_array
190                .to_data()
191                .validate_data()
192                .expect("RunArray data should be valid");
193
194            return run_array;
195        }
196
197        Self {
198            data_type,
199            run_ends,
200            values,
201        }
202    }
203
204    /// Deconstruct this array into its constituent parts
205    pub fn into_parts(self) -> (DataType, RunEndBuffer<R::Native>, ArrayRef) {
206        (self.data_type, self.run_ends, self.values)
207    }
208
209    /// Returns a reference to the [`RunEndBuffer`].
210    pub fn run_ends(&self) -> &RunEndBuffer<R::Native> {
211        &self.run_ends
212    }
213
214    /// Returns a reference to the values array.
215    ///
216    /// Any slicing of this [`RunArray`] array is **not** applied to the returned
217    /// values here and must be handled separately.
218    pub fn values(&self) -> &ArrayRef {
219        &self.values
220    }
221
222    /// Returns a new [`RunArray`] with the same `run_ends` and the supplied `values`.
223    ///
224    /// # Panics
225    ///
226    /// Panics if `values.len()` does not equal `self.values().len()`.
227    ///
228    /// # Example
229    ///
230    /// ```
231    /// # use std::sync::Arc;
232    /// # use arrow_array::{RunArray, Int32Array, StringArray, ArrayRef,Array};
233    /// # use arrow_array::types::Int32Type;
234    /// // A RunArray logically representing ["a", "a", "b", "c", "c"]
235    /// let run_ends = Int32Array::from(vec![2, 3, 5]);
236    /// let values: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "c"]));
237    /// let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
238    ///
239    /// // Swap in new values while keeping the same run pattern.
240    /// // The result logically represents ["x", "x", "y", "z", "z"].
241    /// let new_values: ArrayRef = Arc::new(StringArray::from(vec!["x", "y", "z"]));
242    /// let new_run_array = run_array.with_values(new_values);
243    ///
244    /// assert_eq!(new_run_array.len(), 5);
245    /// assert_eq!(new_run_array.run_ends().values(), &[2, 3, 5]);
246    /// ```
247    pub fn with_values(&self, values: ArrayRef) -> Self {
248        assert_eq!(values.len(), self.values().len());
249        let (run_ends_field, values_field) = match &self.data_type {
250            DataType::RunEndEncoded(r, v) => (r, v),
251            _ => unreachable!("RunArray should have type RunEndEncoded"),
252        };
253        let data_type =
254            DataType::RunEndEncoded(Arc::clone(run_ends_field), Arc::clone(values_field));
255        Self {
256            data_type,
257            run_ends: self.run_ends.clone(),
258            values,
259        }
260    }
261
262    /// Similar to [`values`] but accounts for logical slicing, returning only the values
263    /// that are part of the logical slice of this array.
264    ///
265    /// [`values`]: Self::values
266    pub fn values_slice(&self) -> ArrayRef {
267        if self.is_empty() {
268            return self.values.slice(0, 0);
269        }
270        let start = self.get_start_physical_index();
271        let end = self.get_end_physical_index();
272        self.values.slice(start, end - start + 1)
273    }
274
275    /// Returns the physical index at which the array slice starts.
276    ///
277    /// See [`RunEndBuffer::get_start_physical_index`].
278    pub fn get_start_physical_index(&self) -> usize {
279        self.run_ends.get_start_physical_index()
280    }
281
282    /// Returns the physical index at which the array slice ends.
283    ///
284    /// See [`RunEndBuffer::get_end_physical_index`].
285    pub fn get_end_physical_index(&self) -> usize {
286        self.run_ends.get_end_physical_index()
287    }
288
289    /// Downcast this [`RunArray`] to a [`TypedRunArray`]
290    ///
291    /// ```
292    /// use arrow_array::{Array, ArrayAccessor, RunArray, StringArray, types::Int32Type};
293    ///
294    /// let orig = [Some("a"), Some("b"), None];
295    /// let run_array = RunArray::<Int32Type>::from_iter(orig);
296    /// let typed = run_array.downcast::<StringArray>().unwrap();
297    /// assert_eq!(typed.value(0), "a");
298    /// assert_eq!(typed.value(1), "b");
299    /// assert!(typed.values().is_null(2));
300    /// ```
301    pub fn downcast<V: 'static>(&self) -> Option<TypedRunArray<'_, R, V>> {
302        let values = self.values.as_any().downcast_ref()?;
303        Some(TypedRunArray {
304            run_array: self,
305            values,
306        })
307    }
308
309    /// Calls [`RunEndBuffer::get_physical_index`].
310    ///
311    /// The result is arbitrary if `logical_index >= self.len()`
312    pub fn get_physical_index(&self, logical_index: usize) -> usize {
313        self.run_ends.get_physical_index(logical_index)
314    }
315
316    /// Returns the physical indices corresponding to the provided logical indices.
317    ///
318    /// See [`RunEndBuffer::get_physical_indices`] for more details.
319    #[inline]
320    pub fn get_physical_indices<I>(&self, logical_indices: &[I]) -> Result<Vec<usize>, ArrowError>
321    where
322        I: ArrowNativeType,
323    {
324        self.run_ends()
325            .get_physical_indices(logical_indices)
326            .map_err(|index| {
327                ArrowError::InvalidArgumentError(format!(
328                    "Logical index {} is out of bounds for RunArray of length {}",
329                    index.as_usize(),
330                    self.len()
331                ))
332            })
333    }
334
335    /// Returns a zero-copy slice of this array with the indicated offset and length.
336    ///
337    /// # Panics
338    ///
339    /// - Specified slice (`offset` + `length`) exceeds existing length
340    pub fn slice(&self, offset: usize, length: usize) -> Self {
341        Self {
342            data_type: self.data_type.clone(),
343            run_ends: self.run_ends.slice(offset, length),
344            values: self.values.clone(),
345        }
346    }
347}
348
349impl<R: RunEndIndexType> From<ArrayData> for RunArray<R> {
350    // The method assumes the caller already validated the data using `ArrayData::validate_data()`
351    fn from(data: ArrayData) -> Self {
352        let (data_type, len, _nulls, offset, _buffers, child_data) = data.into_parts();
353
354        match &data_type {
355            DataType::RunEndEncoded(_, _) => {}
356            _ => {
357                panic!(
358                    "Invalid data type {data_type:?} for RunArray. Should be DataType::RunEndEncoded"
359                );
360            }
361        }
362
363        let [run_end_child, values_child]: [ArrayData; 2] = child_data
364            .try_into()
365            .expect("RunArray data should have exactly two child arrays");
366
367        // deconstruct the run ends child array
368        let (
369            run_end_data_type,
370            _run_end_len,
371            _run_end_nulls,
372            _run_end_offset,
373            run_end_buffers,
374            _run_end_child_data,
375        ) = run_end_child.into_parts();
376        assert_eq!(run_end_data_type, R::DATA_TYPE, "Incorrect run ends type");
377        let [run_end_buffer]: [arrow_buffer::Buffer; 1] = run_end_buffers
378            .try_into()
379            .expect("Run ends should have exactly one buffer");
380        let scalar = ScalarBuffer::from(run_end_buffer);
381        let run_ends = unsafe { RunEndBuffer::new_unchecked(scalar, offset, len) };
382
383        let values = make_array(values_child);
384
385        Self {
386            data_type,
387            run_ends,
388            values,
389        }
390    }
391}
392
393impl<R: RunEndIndexType> From<RunArray<R>> for ArrayData {
394    fn from(array: RunArray<R>) -> Self {
395        let len = array.run_ends.len();
396        let offset = array.run_ends.offset();
397
398        let run_ends = ArrayDataBuilder::new(R::DATA_TYPE)
399            .len(array.run_ends.values().len())
400            .buffers(vec![array.run_ends.into_inner().into_inner()]);
401
402        let run_ends = unsafe { run_ends.build_unchecked() };
403
404        let builder = ArrayDataBuilder::new(array.data_type)
405            .len(len)
406            .offset(offset)
407            .child_data(vec![run_ends, array.values.to_data()]);
408
409        unsafe { builder.build_unchecked() }
410    }
411}
412
413/// SAFETY: Correctly implements the contract of Arrow Arrays
414unsafe impl<T: RunEndIndexType> Array for RunArray<T> {
415    fn as_any(&self) -> &dyn Any {
416        self
417    }
418
419    fn to_data(&self) -> ArrayData {
420        self.clone().into()
421    }
422
423    fn into_data(self) -> ArrayData {
424        self.into()
425    }
426
427    fn data_type(&self) -> &DataType {
428        &self.data_type
429    }
430
431    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
432        Arc::new(self.slice(offset, length))
433    }
434
435    fn len(&self) -> usize {
436        self.run_ends.len()
437    }
438
439    fn is_empty(&self) -> bool {
440        self.run_ends.is_empty()
441    }
442
443    fn shrink_to_fit(&mut self) {
444        self.run_ends.shrink_to_fit();
445        self.values.shrink_to_fit();
446    }
447
448    fn offset(&self) -> usize {
449        self.run_ends.offset()
450    }
451
452    fn nulls(&self) -> Option<&NullBuffer> {
453        None
454    }
455
456    fn logical_nulls(&self) -> Option<NullBuffer> {
457        let len = self.len();
458        let nulls = self.values.logical_nulls()?;
459        let mut out = BooleanBufferBuilder::new(len);
460        let offset = self.run_ends.offset();
461        let mut valid_start = 0;
462        let mut last_end = 0;
463        for (idx, end) in self.run_ends.values().iter().enumerate() {
464            let end = end.as_usize();
465            if end < offset {
466                continue;
467            }
468            let end = (end - offset).min(len);
469            if nulls.is_null(idx) {
470                if valid_start < last_end {
471                    out.append_n(last_end - valid_start, true);
472                }
473                out.append_n(end - last_end, false);
474                valid_start = end;
475            }
476            last_end = end;
477            if end == len {
478                break;
479            }
480        }
481        if valid_start < len {
482            out.append_n(len - valid_start, true)
483        }
484        // Sanity check
485        assert_eq!(out.len(), len);
486        Some(out.finish().into())
487    }
488
489    fn is_nullable(&self) -> bool {
490        !self.is_empty() && self.values.is_nullable()
491    }
492
493    fn get_buffer_memory_size(&self) -> usize {
494        self.run_ends.inner().inner().capacity() + self.values.get_buffer_memory_size()
495    }
496
497    fn get_array_memory_size(&self) -> usize {
498        std::mem::size_of::<Self>()
499            + self.run_ends.inner().inner().capacity()
500            + self.values.get_array_memory_size()
501    }
502
503    #[cfg(feature = "pool")]
504    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
505        self.run_ends.claim(pool);
506        self.values.claim(pool);
507    }
508}
509
510impl<R: RunEndIndexType> std::fmt::Debug for RunArray<R> {
511    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
512        writeln!(
513            f,
514            "RunArray {{run_ends: {:?}, values: {:?}}}",
515            self.run_ends.values(),
516            self.values
517        )
518    }
519}
520
521/// Constructs a `RunArray` from an iterator of optional strings.
522///
523/// # Example:
524/// ```
525/// use arrow_array::{RunArray, PrimitiveArray, StringArray, types::Int16Type};
526///
527/// let test = vec!["a", "a", "b", "c", "c"];
528/// let array: RunArray<Int16Type> = test
529///     .iter()
530///     .map(|&x| if x == "b" { None } else { Some(x) })
531///     .collect();
532/// assert_eq!(
533///     "RunArray {run_ends: [2, 3, 5], values: StringArray\n[\n  \"a\",\n  null,\n  \"c\",\n]}\n",
534///     format!("{:?}", array)
535/// );
536/// ```
537impl<'a, T: RunEndIndexType> FromIterator<Option<&'a str>> for RunArray<T> {
538    fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
539        let it = iter.into_iter();
540        let (lower, _) = it.size_hint();
541        let mut builder = StringRunBuilder::with_capacity(lower, 256);
542        it.for_each(|i| {
543            builder.append_option(i);
544        });
545
546        builder.finish()
547    }
548}
549
550/// Constructs a `RunArray` from an iterator of strings.
551///
552/// # Example:
553///
554/// ```
555/// use arrow_array::{RunArray, PrimitiveArray, StringArray, types::Int16Type};
556///
557/// let test = vec!["a", "a", "b", "c"];
558/// let array: RunArray<Int16Type> = test.into_iter().collect();
559/// assert_eq!(
560///     "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n  \"a\",\n  \"b\",\n  \"c\",\n]}\n",
561///     format!("{:?}", array)
562/// );
563/// ```
564impl<'a, T: RunEndIndexType> FromIterator<&'a str> for RunArray<T> {
565    fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
566        let it = iter.into_iter();
567        let (lower, _) = it.size_hint();
568        let mut builder = StringRunBuilder::with_capacity(lower, 256);
569        it.for_each(|i| {
570            builder.append_value(i);
571        });
572
573        builder.finish()
574    }
575}
576
577///
578/// A [`RunArray`] with `i16` run ends
579///
580/// # Example: Using `collect`
581/// ```
582/// # use arrow_array::{Array, Int16RunArray, Int16Array, StringArray};
583/// # use std::sync::Arc;
584///
585/// let array: Int16RunArray = vec!["a", "a", "b", "c", "c"].into_iter().collect();
586/// let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", "c"]));
587/// assert_eq!(array.run_ends().values(), &[2, 3, 5]);
588/// assert_eq!(array.values(), &values);
589/// ```
590pub type Int16RunArray = RunArray<Int16Type>;
591
592///
593/// A [`RunArray`] with `i32` run ends
594///
595/// # Example: Using `collect`
596/// ```
597/// # use arrow_array::{Array, Int32RunArray, Int32Array, StringArray};
598/// # use std::sync::Arc;
599///
600/// let array: Int32RunArray = vec!["a", "a", "b", "c", "c"].into_iter().collect();
601/// let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", "c"]));
602/// assert_eq!(array.run_ends().values(), &[2, 3, 5]);
603/// assert_eq!(array.values(), &values);
604/// ```
605pub type Int32RunArray = RunArray<Int32Type>;
606
607///
608/// A [`RunArray`] with `i64` run ends
609///
610/// # Example: Using `collect`
611/// ```
612/// # use arrow_array::{Array, Int64RunArray, Int64Array, StringArray};
613/// # use std::sync::Arc;
614///
615/// let array: Int64RunArray = vec!["a", "a", "b", "c", "c"].into_iter().collect();
616/// let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", "c"]));
617/// assert_eq!(array.run_ends().values(), &[2, 3, 5]);
618/// assert_eq!(array.values(), &values);
619/// ```
620pub type Int64RunArray = RunArray<Int64Type>;
621
622/// A [`RunArray`] typed typed on its child values array
623///
624/// Implements [`ArrayAccessor`] and [`IntoIterator`] allowing fast access to its elements
625///
626/// ```
627/// use arrow_array::{RunArray, StringArray, types::Int32Type};
628///
629/// let orig = ["a", "b", "a", "b"];
630/// let ree_array = RunArray::<Int32Type>::from_iter(orig);
631///
632/// // `TypedRunArray` allows you to access the values directly
633/// let typed = ree_array.downcast::<StringArray>().unwrap();
634///
635/// for (maybe_val, orig) in typed.into_iter().zip(orig) {
636///     assert_eq!(maybe_val.unwrap(), orig)
637/// }
638/// ```
639pub struct TypedRunArray<'a, R: RunEndIndexType, V> {
640    /// The run array
641    run_array: &'a RunArray<R>,
642
643    /// The values of the run_array
644    values: &'a V,
645}
646
647// Manually implement `Clone` to avoid `V: Clone` type constraint
648impl<R: RunEndIndexType, V> Clone for TypedRunArray<'_, R, V> {
649    fn clone(&self) -> Self {
650        *self
651    }
652}
653
654impl<R: RunEndIndexType, V> Copy for TypedRunArray<'_, R, V> {}
655
656impl<R: RunEndIndexType, V> std::fmt::Debug for TypedRunArray<'_, R, V> {
657    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
658        writeln!(f, "TypedRunArray({:?})", self.run_array)
659    }
660}
661
662impl<'a, R: RunEndIndexType, V> TypedRunArray<'a, R, V> {
663    /// Returns the run_ends of this [`TypedRunArray`]
664    pub fn run_ends(&self) -> &'a RunEndBuffer<R::Native> {
665        self.run_array.run_ends()
666    }
667
668    /// Returns the values of this [`TypedRunArray`]
669    pub fn values(&self) -> &'a V {
670        self.values
671    }
672
673    /// Returns the run array of this [`TypedRunArray`]
674    pub fn run_array(&self) -> &'a RunArray<R> {
675        self.run_array
676    }
677}
678
679/// SAFETY: Correctly implements the contract of Arrow Arrays
680unsafe impl<R: RunEndIndexType, V: Sync> Array for TypedRunArray<'_, R, V> {
681    fn as_any(&self) -> &dyn Any {
682        self.run_array
683    }
684
685    fn to_data(&self) -> ArrayData {
686        self.run_array.to_data()
687    }
688
689    fn into_data(self) -> ArrayData {
690        self.run_array.into_data()
691    }
692
693    fn data_type(&self) -> &DataType {
694        self.run_array.data_type()
695    }
696
697    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
698        Arc::new(self.run_array.slice(offset, length))
699    }
700
701    fn len(&self) -> usize {
702        self.run_array.len()
703    }
704
705    fn is_empty(&self) -> bool {
706        self.run_array.is_empty()
707    }
708
709    fn offset(&self) -> usize {
710        self.run_array.offset()
711    }
712
713    fn nulls(&self) -> Option<&NullBuffer> {
714        self.run_array.nulls()
715    }
716
717    fn logical_nulls(&self) -> Option<NullBuffer> {
718        self.run_array.logical_nulls()
719    }
720
721    fn logical_null_count(&self) -> usize {
722        self.run_array.logical_null_count()
723    }
724
725    fn is_nullable(&self) -> bool {
726        self.run_array.is_nullable()
727    }
728
729    fn get_buffer_memory_size(&self) -> usize {
730        self.run_array.get_buffer_memory_size()
731    }
732
733    fn get_array_memory_size(&self) -> usize {
734        self.run_array.get_array_memory_size()
735    }
736
737    #[cfg(feature = "pool")]
738    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
739        self.run_array.claim(pool);
740    }
741}
742
743// Array accessor converts the index of logical array to the index of the physical array
744// using binary search. The time complexity is O(log N) where N is number of runs.
745impl<'a, R, V> ArrayAccessor for TypedRunArray<'a, R, V>
746where
747    R: RunEndIndexType,
748    V: Sync + Send,
749    &'a V: ArrayAccessor,
750    <&'a V as ArrayAccessor>::Item: Default,
751{
752    type Item = <&'a V as ArrayAccessor>::Item;
753
754    fn value(&self, logical_index: usize) -> Self::Item {
755        assert!(
756            logical_index < self.len(),
757            "Trying to access an element at index {} from a TypedRunArray of length {}",
758            logical_index,
759            self.len()
760        );
761        unsafe { self.value_unchecked(logical_index) }
762    }
763
764    unsafe fn value_unchecked(&self, logical_index: usize) -> Self::Item {
765        let physical_index = self.run_array.get_physical_index(logical_index);
766        unsafe { self.values().value_unchecked(physical_index) }
767    }
768}
769
770impl<'a, R, V> IntoIterator for TypedRunArray<'a, R, V>
771where
772    R: RunEndIndexType,
773    V: Sync + Send,
774    &'a V: ArrayAccessor,
775    <&'a V as ArrayAccessor>::Item: Default,
776{
777    type Item = Option<<&'a V as ArrayAccessor>::Item>;
778    type IntoIter = RunArrayIter<'a, R, V>;
779
780    fn into_iter(self) -> Self::IntoIter {
781        RunArrayIter::new(self)
782    }
783}
784
785#[cfg(test)]
786mod tests {
787    use rand::Rng;
788    use rand::rng;
789    use rand::seq::SliceRandom;
790
791    use super::*;
792    use crate::builder::PrimitiveRunBuilder;
793    use crate::cast::AsArray;
794    use crate::new_empty_array;
795    use crate::types::{Int8Type, UInt32Type};
796    use crate::{Int16Array, Int32Array, StringArray};
797
798    fn build_input_array(size: usize) -> Vec<Option<i32>> {
799        // The input array is created by shuffling and repeating
800        // the seed values random number of times.
801        let mut seed: Vec<Option<i32>> = vec![
802            None,
803            None,
804            None,
805            Some(1),
806            Some(2),
807            Some(3),
808            Some(4),
809            Some(5),
810            Some(6),
811            Some(7),
812            Some(8),
813            Some(9),
814        ];
815        let mut result: Vec<Option<i32>> = Vec::with_capacity(size);
816        let mut ix = 0;
817        let mut rng = rng();
818        // run length can go up to 8. Cap the max run length for smaller arrays to size / 2.
819        let max_run_length = 8_usize.min(1_usize.max(size / 2));
820        while result.len() < size {
821            // shuffle the seed array if all the values are iterated.
822            if ix == 0 {
823                seed.shuffle(&mut rng);
824            }
825            // repeat the items between 1 and 8 times. Cap the length for smaller sized arrays
826            let num = max_run_length.min(rng.random_range(1..=max_run_length));
827            for _ in 0..num {
828                result.push(seed[ix]);
829            }
830            ix += 1;
831            if ix == seed.len() {
832                ix = 0
833            }
834        }
835        result.resize(size, None);
836        result
837    }
838
839    // Asserts that `logical_array[logical_indices[*]] == physical_array[physical_indices[*]]`
840    fn compare_logical_and_physical_indices(
841        logical_indices: &[u32],
842        logical_array: &[Option<i32>],
843        physical_indices: &[usize],
844        physical_array: &PrimitiveArray<Int32Type>,
845    ) {
846        assert_eq!(logical_indices.len(), physical_indices.len());
847
848        // check value in logical index in the logical_array matches physical index in physical_array
849        logical_indices
850            .iter()
851            .map(|f| f.as_usize())
852            .zip(physical_indices.iter())
853            .for_each(|(logical_ix, physical_ix)| {
854                let expected = logical_array[logical_ix];
855                match expected {
856                    Some(val) => {
857                        assert!(physical_array.is_valid(*physical_ix));
858                        let actual = physical_array.value(*physical_ix);
859                        assert_eq!(val, actual);
860                    }
861                    None => {
862                        assert!(physical_array.is_null(*physical_ix))
863                    }
864                };
865            });
866    }
867    #[test]
868    fn test_run_array() {
869        // Construct a value array
870        let value_data =
871            PrimitiveArray::<Int8Type>::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
872
873        // Construct a run_ends array:
874        let run_ends_values = [4_i16, 6, 7, 9, 13, 18, 20, 22];
875        let run_ends_data =
876            PrimitiveArray::<Int16Type>::from_iter_values(run_ends_values.iter().copied());
877
878        // Construct a run ends encoded array from the above two
879        let ree_array = RunArray::<Int16Type>::try_new(&run_ends_data, &value_data).unwrap();
880
881        assert_eq!(ree_array.len(), 22);
882        assert_eq!(ree_array.null_count(), 0);
883
884        let values = ree_array.values();
885        assert_eq!(value_data.into_data(), values.to_data());
886        assert_eq!(&DataType::Int8, values.data_type());
887
888        let run_ends = ree_array.run_ends();
889        assert_eq!(run_ends.values(), &run_ends_values);
890    }
891
892    #[test]
893    fn test_run_array_empty() {
894        let runs = new_empty_array(&DataType::Int16);
895        let runs = runs.as_primitive::<Int16Type>();
896        let values = new_empty_array(&DataType::Int64);
897        let array = RunArray::try_new(runs, &values).unwrap();
898
899        fn assertions(array: &RunArray<Int16Type>) {
900            assert!(array.is_empty());
901            assert_eq!(array.get_start_physical_index(), 0);
902            assert_eq!(array.get_end_physical_index(), 0);
903            assert!(array.get_physical_indices::<i16>(&[]).unwrap().is_empty());
904            assert!(array.run_ends().is_empty());
905            assert_eq!(array.run_ends().sliced_values().count(), 0);
906        }
907
908        assertions(&array);
909        assertions(&array.slice(0, 0));
910    }
911
912    #[test]
913    fn test_run_array_fmt_debug() {
914        let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(3);
915        builder.append_value(12345678);
916        builder.append_null();
917        builder.append_value(22345678);
918        let array = builder.finish();
919        assert_eq!(
920            "RunArray {run_ends: [1, 2, 3], values: PrimitiveArray<UInt32>\n[\n  12345678,\n  null,\n  22345678,\n]}\n",
921            format!("{array:?}")
922        );
923
924        let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(20);
925        for _ in 0..20 {
926            builder.append_value(1);
927        }
928        let array = builder.finish();
929
930        assert_eq!(array.len(), 20);
931        assert_eq!(array.null_count(), 0);
932        assert_eq!(array.logical_null_count(), 0);
933
934        assert_eq!(
935            "RunArray {run_ends: [20], values: PrimitiveArray<UInt32>\n[\n  1,\n]}\n",
936            format!("{array:?}")
937        );
938    }
939
940    #[test]
941    fn test_run_array_from_iter() {
942        let test = vec!["a", "a", "b", "c"];
943        let array: RunArray<Int16Type> = test
944            .iter()
945            .map(|&x| if x == "b" { None } else { Some(x) })
946            .collect();
947        assert_eq!(
948            "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n  \"a\",\n  null,\n  \"c\",\n]}\n",
949            format!("{array:?}")
950        );
951
952        assert_eq!(array.len(), 4);
953        assert_eq!(array.null_count(), 0);
954        assert_eq!(array.logical_null_count(), 1);
955
956        let array: RunArray<Int16Type> = test.into_iter().collect();
957        assert_eq!(
958            "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n  \"a\",\n  \"b\",\n  \"c\",\n]}\n",
959            format!("{array:?}")
960        );
961    }
962
963    #[test]
964    fn test_run_array_run_ends_as_primitive_array() {
965        let test = vec!["a", "b", "c", "a"];
966        let array: RunArray<Int16Type> = test.into_iter().collect();
967
968        assert_eq!(array.len(), 4);
969        assert_eq!(array.null_count(), 0);
970        assert_eq!(array.logical_null_count(), 0);
971
972        let run_ends = array.run_ends();
973        assert_eq!(&[1, 2, 3, 4], run_ends.values());
974    }
975
976    #[test]
977    fn test_run_array_as_primitive_array_with_null() {
978        let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
979        let array: RunArray<Int32Type> = test.into_iter().collect();
980
981        assert_eq!(array.len(), 6);
982        assert_eq!(array.null_count(), 0);
983        assert_eq!(array.logical_null_count(), 3);
984
985        let run_ends = array.run_ends();
986        assert_eq!(&[1, 2, 3, 5, 6], run_ends.values());
987
988        let values_data = array.values();
989        assert_eq!(2, values_data.null_count());
990        assert_eq!(5, values_data.len());
991    }
992
993    #[test]
994    fn test_run_array_all_nulls() {
995        let test = vec![None, None, None];
996        let array: RunArray<Int32Type> = test.into_iter().collect();
997
998        assert_eq!(array.len(), 3);
999        assert_eq!(array.null_count(), 0);
1000        assert_eq!(array.logical_null_count(), 3);
1001
1002        let run_ends = array.run_ends();
1003        assert_eq!(3, run_ends.len());
1004        assert_eq!(&[3], run_ends.values());
1005
1006        let values_data = array.values();
1007        assert_eq!(1, values_data.null_count());
1008    }
1009
1010    #[test]
1011    fn test_run_array_try_new() {
1012        let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
1013            .into_iter()
1014            .collect();
1015        let run_ends: Int32Array = [Some(1), Some(2), Some(3), Some(4)].into_iter().collect();
1016
1017        let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1018        assert_eq!(array.values().data_type(), &DataType::Utf8);
1019
1020        assert_eq!(array.null_count(), 0);
1021        assert_eq!(array.logical_null_count(), 1);
1022        assert_eq!(array.len(), 4);
1023        assert_eq!(array.values().null_count(), 1);
1024
1025        assert_eq!(
1026            "RunArray {run_ends: [1, 2, 3, 4], values: StringArray\n[\n  \"foo\",\n  \"bar\",\n  null,\n  \"baz\",\n]}\n",
1027            format!("{array:?}")
1028        );
1029    }
1030
1031    #[test]
1032    fn test_run_array_int16_type_definition() {
1033        let array: Int16RunArray = vec!["a", "a", "b", "c", "c"].into_iter().collect();
1034        let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", "c"]));
1035        assert_eq!(array.run_ends().values(), &[2, 3, 5]);
1036        assert_eq!(array.values(), &values);
1037    }
1038
1039    #[test]
1040    fn test_run_array_empty_string() {
1041        let array: Int16RunArray = vec!["a", "a", "", "", "c"].into_iter().collect();
1042        let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "", "c"]));
1043        assert_eq!(array.run_ends().values(), &[2, 4, 5]);
1044        assert_eq!(array.values(), &values);
1045    }
1046
1047    #[test]
1048    fn test_run_array_length_mismatch() {
1049        let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
1050            .into_iter()
1051            .collect();
1052        let run_ends: Int32Array = [Some(1), Some(2), Some(3)].into_iter().collect();
1053
1054        let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
1055        let expected = ArrowError::InvalidArgumentError("The run_ends array length should be the same as values array length. Run_ends array length is 3, values array length is 4".to_string());
1056        assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
1057    }
1058
1059    #[test]
1060    fn test_run_array_run_ends_with_null() {
1061        let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1062            .into_iter()
1063            .collect();
1064        let run_ends: Int32Array = [Some(1), None, Some(3)].into_iter().collect();
1065
1066        let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
1067        let expected = ArrowError::InvalidArgumentError(
1068            "Found null values in run_ends array. The run_ends array should not have null values."
1069                .to_string(),
1070        );
1071        assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
1072    }
1073
1074    #[test]
1075    fn test_run_array_run_ends_with_zeroes() {
1076        let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1077            .into_iter()
1078            .collect();
1079        let run_ends: Int32Array = [Some(0), Some(1), Some(3)].into_iter().collect();
1080
1081        let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
1082        let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly positive. Found value 0 at index 0 that does not match the criteria.".to_string());
1083        assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
1084    }
1085
1086    #[test]
1087    fn test_run_array_run_ends_non_increasing() {
1088        let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1089            .into_iter()
1090            .collect();
1091        let run_ends: Int32Array = [Some(1), Some(4), Some(4)].into_iter().collect();
1092
1093        let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
1094        let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly increasing. Found value 4 at index 2 with previous value 4 that does not match the criteria.".to_string());
1095        assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
1096    }
1097
1098    #[test]
1099    #[should_panic(expected = "Incorrect run ends type")]
1100    fn test_run_array_run_ends_data_type_mismatch() {
1101        let a = RunArray::<Int32Type>::from_iter(["32"]);
1102        let _ = RunArray::<Int64Type>::from(a.into_data());
1103    }
1104
1105    #[test]
1106    fn test_ree_array_accessor() {
1107        let input_array = build_input_array(256);
1108
1109        // Encode the input_array to ree_array
1110        let mut builder =
1111            PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
1112        builder.extend(input_array.iter().copied());
1113        let run_array = builder.finish();
1114        let typed = run_array.downcast::<PrimitiveArray<Int32Type>>().unwrap();
1115
1116        // Access every index and check if the value in the input array matches returned value.
1117        for (i, inp_val) in input_array.iter().enumerate() {
1118            if let Some(val) = inp_val {
1119                let actual = typed.value(i);
1120                assert_eq!(*val, actual)
1121            } else {
1122                let physical_ix = run_array.get_physical_index(i);
1123                assert!(typed.values().is_null(physical_ix));
1124            };
1125        }
1126    }
1127
1128    #[test]
1129    #[cfg_attr(miri, ignore)] // Takes too long
1130    fn test_get_physical_indices() {
1131        // Test for logical lengths starting from 10 to 250 increasing by 10
1132        for logical_len in (0..250).step_by(10) {
1133            let input_array = build_input_array(logical_len);
1134
1135            // create run array using input_array
1136            let mut builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
1137            builder.extend(input_array.clone().into_iter());
1138
1139            let run_array = builder.finish();
1140            let physical_values_array = run_array.values().as_primitive::<Int32Type>();
1141
1142            // create an array consisting of all the indices repeated twice and shuffled.
1143            let mut logical_indices: Vec<u32> = (0_u32..(logical_len as u32)).collect();
1144            // add same indices once more
1145            logical_indices.append(&mut logical_indices.clone());
1146            let mut rng = rng();
1147            logical_indices.shuffle(&mut rng);
1148
1149            let physical_indices = run_array.get_physical_indices(&logical_indices).unwrap();
1150
1151            assert_eq!(logical_indices.len(), physical_indices.len());
1152
1153            // check value in logical index in the input_array matches physical index in typed_run_array
1154            compare_logical_and_physical_indices(
1155                &logical_indices,
1156                &input_array,
1157                &physical_indices,
1158                physical_values_array,
1159            );
1160        }
1161    }
1162
1163    #[test]
1164    #[cfg_attr(miri, ignore)] // Takes too long
1165    fn test_get_physical_indices_sliced() {
1166        let total_len = 80;
1167        let input_array = build_input_array(total_len);
1168
1169        // Encode the input_array to run array
1170        let mut builder =
1171            PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
1172        builder.extend(input_array.iter().copied());
1173        let run_array = builder.finish();
1174        let physical_values_array = run_array.values().as_primitive::<Int32Type>();
1175
1176        // test for all slice lengths.
1177        for slice_len in 1..=total_len {
1178            // create an array consisting of all the indices repeated twice and shuffled.
1179            let mut logical_indices: Vec<u32> = (0_u32..(slice_len as u32)).collect();
1180            // add same indices once more
1181            logical_indices.append(&mut logical_indices.clone());
1182            let mut rng = rng();
1183            logical_indices.shuffle(&mut rng);
1184
1185            // test for offset = 0 and slice length = slice_len
1186            // slice the input array using which the run array was built.
1187            let sliced_input_array = &input_array[0..slice_len];
1188
1189            // slice the run array
1190            let sliced_run_array: RunArray<Int16Type> =
1191                run_array.slice(0, slice_len).into_data().into();
1192
1193            // Get physical indices.
1194            let physical_indices = sliced_run_array
1195                .get_physical_indices(&logical_indices)
1196                .unwrap();
1197
1198            compare_logical_and_physical_indices(
1199                &logical_indices,
1200                sliced_input_array,
1201                &physical_indices,
1202                physical_values_array,
1203            );
1204
1205            // test for offset = total_len - slice_len and slice length = slice_len
1206            // slice the input array using which the run array was built.
1207            let sliced_input_array = &input_array[total_len - slice_len..total_len];
1208
1209            // slice the run array
1210            let sliced_run_array: RunArray<Int16Type> = run_array
1211                .slice(total_len - slice_len, slice_len)
1212                .into_data()
1213                .into();
1214
1215            // Get physical indices
1216            let physical_indices = sliced_run_array
1217                .get_physical_indices(&logical_indices)
1218                .unwrap();
1219
1220            compare_logical_and_physical_indices(
1221                &logical_indices,
1222                sliced_input_array,
1223                &physical_indices,
1224                physical_values_array,
1225            );
1226        }
1227    }
1228
1229    #[test]
1230    fn test_logical_nulls() {
1231        let run = Int32Array::from(vec![3, 6, 9, 12]);
1232        let values = Int32Array::from(vec![Some(0), None, Some(1), None]);
1233        let array = RunArray::try_new(&run, &values).unwrap();
1234
1235        let expected = [
1236            true, true, true, false, false, false, true, true, true, false, false, false,
1237        ];
1238
1239        let n = array.logical_nulls().unwrap();
1240        assert_eq!(n.null_count(), 6);
1241
1242        let slices = [(0, 12), (0, 2), (2, 5), (3, 0), (3, 3), (3, 4), (4, 8)];
1243        for (offset, length) in slices {
1244            let a = array.slice(offset, length);
1245            let n = a.logical_nulls().unwrap();
1246            let n = n.into_iter().collect::<Vec<_>>();
1247            assert_eq!(&n, &expected[offset..offset + length], "{offset} {length}");
1248        }
1249    }
1250
1251    #[test]
1252    fn test_run_array_eq_identical() {
1253        let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1254        let values1 = StringArray::from(vec!["a", "b", "c"]);
1255        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1256
1257        let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1258        let values2 = StringArray::from(vec!["a", "b", "c"]);
1259        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1260
1261        assert_eq!(array1, array2);
1262    }
1263
1264    #[test]
1265    fn test_run_array_ne_different_run_ends() {
1266        let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1267        let values1 = StringArray::from(vec!["a", "b", "c"]);
1268        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1269
1270        let run_ends2 = Int32Array::from(vec![1, 4, 6]);
1271        let values2 = StringArray::from(vec!["a", "b", "c"]);
1272        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1273
1274        assert_ne!(array1, array2);
1275    }
1276
1277    #[test]
1278    fn test_run_array_ne_different_values() {
1279        let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1280        let values1 = StringArray::from(vec!["a", "b", "c"]);
1281        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1282
1283        let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1284        let values2 = StringArray::from(vec!["a", "b", "d"]);
1285        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1286
1287        assert_ne!(array1, array2);
1288    }
1289
1290    #[test]
1291    fn test_run_array_eq_with_nulls() {
1292        let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1293        let values1 = StringArray::from(vec![Some("a"), None, Some("c")]);
1294        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1295
1296        let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1297        let values2 = StringArray::from(vec![Some("a"), None, Some("c")]);
1298        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1299
1300        assert_eq!(array1, array2);
1301    }
1302
1303    #[test]
1304    fn test_run_array_eq_different_run_end_types() {
1305        let run_ends_i16_1 = Int16Array::from(vec![2_i16, 4, 6]);
1306        let values_i16_1 = StringArray::from(vec!["a", "b", "c"]);
1307        let array_i16_1 = RunArray::<Int16Type>::try_new(&run_ends_i16_1, &values_i16_1).unwrap();
1308
1309        let run_ends_i16_2 = Int16Array::from(vec![2_i16, 4, 6]);
1310        let values_i16_2 = StringArray::from(vec!["a", "b", "c"]);
1311        let array_i16_2 = RunArray::<Int16Type>::try_new(&run_ends_i16_2, &values_i16_2).unwrap();
1312
1313        assert_eq!(array_i16_1, array_i16_2);
1314    }
1315
1316    #[test]
1317    fn test_run_array_values_slice() {
1318        // 0, 0, 1, 1, 1, 2...2 (15 2s)
1319        let run_ends: PrimitiveArray<Int32Type> = vec![2, 5, 20].into();
1320        let values: PrimitiveArray<Int32Type> = vec![0, 1, 2].into();
1321        let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1322
1323        let slice = array.slice(1, 4); // 0 | 1, 1, 1 |
1324        // logical indices: 1, 2, 3, 4
1325        // physical indices: 0, 1, 1, 1
1326        // values at 0 is 0
1327        // values at 1 is 1
1328        // values slice should be [0, 1]
1329        assert_eq!(slice.get_start_physical_index(), 0);
1330        assert_eq!(slice.get_end_physical_index(), 1);
1331
1332        let values_slice = slice.values_slice();
1333        let values_slice = values_slice.as_primitive::<Int32Type>();
1334        assert_eq!(values_slice.values(), &[0, 1]);
1335
1336        let slice2 = array.slice(2, 3); // 1, 1, 1
1337        // logical indices: 2, 3, 4
1338        // physical indices: 1, 1, 1
1339        assert_eq!(slice2.get_start_physical_index(), 1);
1340        assert_eq!(slice2.get_end_physical_index(), 1);
1341
1342        let values_slice2 = slice2.values_slice();
1343        let values_slice2 = values_slice2.as_primitive::<Int32Type>();
1344        assert_eq!(values_slice2.values(), &[1]);
1345    }
1346
1347    #[test]
1348    fn test_run_array_values_slice_empty() {
1349        let run_ends = Int32Array::from(vec![2, 5, 10]);
1350        let values = StringArray::from(vec!["a", "b", "c"]);
1351        let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1352
1353        let slice = array.slice(0, 0);
1354        assert_eq!(slice.len(), 0);
1355
1356        let values_slice = slice.values_slice();
1357        assert_eq!(values_slice.len(), 0);
1358        assert_eq!(values_slice.data_type(), &DataType::Utf8);
1359    }
1360
1361    #[test]
1362    fn test_run_array_eq_empty() {
1363        let run_ends = Int32Array::from(vec![2, 5, 10]);
1364        let values = StringArray::from(vec!["a", "b", "c"]);
1365        let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1366
1367        let slice1 = array.slice(0, 0);
1368        let slice2 = array.slice(1, 0);
1369        let slice3 = array.slice(10, 0);
1370
1371        assert_eq!(slice1, slice2);
1372        assert_eq!(slice2, slice3);
1373
1374        let empty_array = new_empty_array(array.data_type());
1375        let empty_array = crate::cast::as_run_array::<Int32Type>(empty_array.as_ref());
1376
1377        assert_eq!(&slice1, empty_array);
1378    }
1379
1380    #[test]
1381    fn test_run_array_eq_diff_physical_same_logical() {
1382        let run_ends1 = Int32Array::from(vec![1, 3, 6]);
1383        let values1 = StringArray::from(vec!["a", "b", "c"]);
1384        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1385
1386        let run_ends2 = Int32Array::from(vec![1, 2, 3, 4, 5, 6]);
1387        let values2 = StringArray::from(vec!["a", "b", "b", "c", "c", "c"]);
1388        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1389
1390        assert_eq!(array1, array2);
1391    }
1392
1393    #[test]
1394    fn test_run_array_eq_sliced() {
1395        let run_ends1 = Int32Array::from(vec![2, 5, 10]);
1396        let values1 = StringArray::from(vec!["a", "b", "c"]);
1397        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1398        // Logical: a, a, b, b, b, c, c, c, c, c
1399
1400        let slice1 = array1.slice(1, 6);
1401        // Logical: a, b, b, b, c, c
1402
1403        let run_ends2 = Int32Array::from(vec![1, 4, 6]);
1404        let values2 = StringArray::from(vec!["a", "b", "c"]);
1405        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1406        // Logical: a, b, b, b, c, c
1407
1408        assert_eq!(slice1, array2);
1409
1410        let slice2 = array1.slice(2, 3);
1411        // Logical: b, b, b
1412        let run_ends3 = Int32Array::from(vec![3]);
1413        let values3 = StringArray::from(vec!["b"]);
1414        let array3 = RunArray::<Int32Type>::try_new(&run_ends3, &values3).unwrap();
1415        assert_eq!(slice2, array3);
1416    }
1417
1418    #[test]
1419    fn test_run_array_eq_sliced_different_offsets() {
1420        let run_ends1 = Int32Array::from(vec![2, 5, 10]);
1421        let values1 = StringArray::from(vec!["a", "b", "c"]);
1422        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1423        let array2 = array1.clone();
1424        assert_eq!(array1, array2);
1425
1426        let slice1 = array1.slice(1, 4); // a, b, b, b
1427        let slice2 = array1.slice(1, 4);
1428        assert_eq!(slice1, slice2);
1429
1430        let slice3 = array1.slice(0, 4); // a, a, b, b
1431        assert_ne!(slice1, slice3);
1432    }
1433
1434    #[test]
1435    #[cfg(not(feature = "force_validate"))]
1436    fn allow_to_create_invalid_array_using_new_unchecked() {
1437        let valid = RunArray::<Int32Type>::from_iter(["32"]);
1438        let (_, buffer, values) = valid.into_parts();
1439
1440        let _ = unsafe {
1441            // mismatch data type
1442            RunArray::<Int32Type>::new_unchecked(DataType::Int64, buffer, values)
1443        };
1444    }
1445
1446    #[test]
1447    #[should_panic(
1448        expected = "Invalid data type Int64 for RunArray. Should be DataType::RunEndEncoded"
1449    )]
1450    #[cfg(feature = "force_validate")]
1451    fn should_not_be_able_to_create_invalid_array_using_new_unchecked_when_force_validate_is_enabled()
1452     {
1453        let valid = RunArray::<Int32Type>::from_iter(["32"]);
1454        let (_, buffer, values) = valid.into_parts();
1455
1456        let _ = unsafe {
1457            // mismatch data type
1458            RunArray::<Int32Type>::new_unchecked(DataType::Int64, buffer, values)
1459        };
1460    }
1461
1462    #[test]
1463    fn test_run_array_roundtrip() {
1464        let run = Int32Array::from(vec![3, 6, 9, 12]);
1465        let values = Int32Array::from(vec![Some(0), None, Some(1), None]);
1466        let array = RunArray::try_new(&run, &values).unwrap();
1467
1468        let (dt, buffer, values) = array.clone().into_parts();
1469        let created_from_parts =
1470            unsafe { RunArray::<Int32Type>::new_unchecked(dt, buffer, values) };
1471        assert_eq!(array, created_from_parts);
1472    }
1473}