arrow_array/array/
run_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::sync::Arc;
20
21use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, RunEndBuffer, ScalarBuffer};
22use arrow_data::{ArrayData, ArrayDataBuilder};
23use arrow_schema::{ArrowError, DataType, Field};
24
25use crate::{
26    Array, ArrayAccessor, ArrayRef, PrimitiveArray,
27    builder::StringRunBuilder,
28    make_array,
29    run_iterator::RunArrayIter,
30    types::{Int16Type, Int32Type, Int64Type, RunEndIndexType},
31};
32
33/// An array of [run-end encoded values].
34///
35/// This encoding is variation on [run-length encoding (RLE)] and is good for representing
36/// data containing the same values repeated consecutively.
37///
38/// A [`RunArray`] consists of a `run_ends` buffer and a `values` array of equivalent
39/// lengths. The `run_ends` buffer stores the indexes at which the run ends. The
40/// `values` array stores the corresponding value of each run. The below example
41/// illustrates how a logical array is represented by a [`RunArray`]:
42///
43/// ```text
44/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┐
45///   ┌─────────────────┐  ┌─────────┐       ┌─────────────────┐
46/// │ │        A        │  │    2    │ │     │        A        │
47///   ├─────────────────┤  ├─────────┤       ├─────────────────┤
48/// │ │        D        │  │    3    │ │     │        A        │    run length of 'A' = runs_ends[0] - 0 = 2
49///   ├─────────────────┤  ├─────────┤       ├─────────────────┤
50/// │ │        B        │  │    6    │ │     │        D        │    run length of 'D' = run_ends[1] - run_ends[0] = 1
51///   └─────────────────┘  └─────────┘       ├─────────────────┤
52/// │        values          run_ends  │     │        B        │
53///                                          ├─────────────────┤
54/// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┘     │        B        │
55///                                          ├─────────────────┤
56///                RunArray                  │        B        │    run length of 'B' = run_ends[2] - run_ends[1] = 3
57///               length = 3                 └─────────────────┘
58///
59///                                             Logical array
60///                                                Contents
61/// ```
62///
63/// [run-end encoded values]: https://arrow.apache.org/docs/format/Columnar.html#run-end-encoded-layout
64/// [run-length encoding (RLE)]: https://en.wikipedia.org/wiki/Run-length_encoding
65pub struct RunArray<R: RunEndIndexType> {
66    data_type: DataType,
67    run_ends: RunEndBuffer<R::Native>,
68    values: ArrayRef,
69}
70
71impl<R: RunEndIndexType> Clone for RunArray<R> {
72    fn clone(&self) -> Self {
73        Self {
74            data_type: self.data_type.clone(),
75            run_ends: self.run_ends.clone(),
76            values: self.values.clone(),
77        }
78    }
79}
80
81impl<R: RunEndIndexType> RunArray<R> {
82    /// Calculates the logical length of the array encoded by treating the `run_ends`
83    /// array as if it were a [`RunEndBuffer`].
84    pub fn logical_len(run_ends: &PrimitiveArray<R>) -> usize {
85        let len = run_ends.len();
86        if len == 0 {
87            return 0;
88        }
89        run_ends.value(len - 1).as_usize()
90    }
91
92    /// Attempts to create a [`RunArray`] using the given `run_ends` and `values`.
93    ///
94    /// # Errors
95    ///
96    /// - If `run_ends` and `values` have different lengths
97    /// - If `run_ends` has any null values
98    /// - If `run_ends` doesn't consist of strictly increasing positive integers
99    pub fn try_new(run_ends: &PrimitiveArray<R>, values: &dyn Array) -> Result<Self, ArrowError> {
100        let run_ends_type = run_ends.data_type().clone();
101        let values_type = values.data_type().clone();
102        let ree_array_type = DataType::RunEndEncoded(
103            Arc::new(Field::new("run_ends", run_ends_type, false)),
104            Arc::new(Field::new("values", values_type, true)),
105        );
106        let len = RunArray::logical_len(run_ends);
107        let builder = ArrayDataBuilder::new(ree_array_type)
108            .len(len)
109            .add_child_data(run_ends.to_data())
110            .add_child_data(values.to_data());
111
112        // `build_unchecked` is used to avoid recursive validation of child arrays.
113        let array_data = unsafe { builder.build_unchecked() };
114
115        // Safety: `validate_data` checks below
116        //    1. The given array data has exactly two child arrays.
117        //    2. The first child array (run_ends) has valid data type.
118        //    3. run_ends array does not have null values
119        //    4. run_ends array has non-zero and strictly increasing values.
120        //    5. The length of run_ends array and values array are the same.
121        array_data.validate_data()?;
122
123        Ok(array_data.into())
124    }
125
126    /// Create a new [`RunArray`] from the provided parts, without validation
127    ///
128    /// # Safety
129    ///
130    /// Safe if [`Self::try_new`] would not error
131    pub unsafe fn new_unchecked(
132        data_type: DataType,
133        run_ends: RunEndBuffer<R::Native>,
134        values: ArrayRef,
135    ) -> Self {
136        if cfg!(feature = "force_validate") {
137            match &data_type {
138                DataType::RunEndEncoded(run_ends, values_field) => {
139                    assert!(!run_ends.is_nullable(), "run_ends should not be nullable");
140                    assert_eq!(
141                        run_ends.data_type(),
142                        &R::DATA_TYPE,
143                        "Incorrect run ends type"
144                    );
145                    assert_eq!(
146                        values_field.data_type(),
147                        values.data_type(),
148                        "Incorrect values type"
149                    );
150                }
151                _ => {
152                    panic!(
153                        "Invalid data type {data_type:?} for RunArray. Should be DataType::RunEndEncoded"
154                    );
155                }
156            }
157
158            let run_array = Self {
159                data_type,
160                run_ends,
161                values,
162            };
163
164            // Safety: `validate_data` checks below
165            //    1. The given array data has exactly two child arrays.
166            //    2. The first child array (run_ends) has valid data type.
167            //    3. run_ends array does not have null values
168            //    4. run_ends array has non-zero and strictly increasing values.
169            //    5. The length of run_ends array and values array are the same.
170            run_array
171                .to_data()
172                .validate_data()
173                .expect("RunArray data should be valid");
174
175            return run_array;
176        }
177
178        Self {
179            data_type,
180            run_ends,
181            values,
182        }
183    }
184
185    /// Deconstruct this array into its constituent parts
186    pub fn into_parts(self) -> (DataType, RunEndBuffer<R::Native>, ArrayRef) {
187        (self.data_type, self.run_ends, self.values)
188    }
189
190    /// Returns a reference to the [`RunEndBuffer`].
191    pub fn run_ends(&self) -> &RunEndBuffer<R::Native> {
192        &self.run_ends
193    }
194
195    /// Returns a reference to the values array.
196    ///
197    /// Any slicing of this [`RunArray`] array is **not** applied to the returned
198    /// values here and must be handled separately.
199    pub fn values(&self) -> &ArrayRef {
200        &self.values
201    }
202
203    /// Similar to [`values`] but accounts for logical slicing, returning only the values
204    /// that are part of the logical slice of this array.
205    ///
206    /// [`values`]: Self::values
207    pub fn values_slice(&self) -> ArrayRef {
208        if self.is_empty() {
209            return self.values.slice(0, 0);
210        }
211        let start = self.get_start_physical_index();
212        let end = self.get_end_physical_index();
213        self.values.slice(start, end - start + 1)
214    }
215
216    /// Returns the physical index at which the array slice starts.
217    ///
218    /// See [`RunEndBuffer::get_start_physical_index`].
219    pub fn get_start_physical_index(&self) -> usize {
220        self.run_ends.get_start_physical_index()
221    }
222
223    /// Returns the physical index at which the array slice ends.
224    ///
225    /// See [`RunEndBuffer::get_end_physical_index`].
226    pub fn get_end_physical_index(&self) -> usize {
227        self.run_ends.get_end_physical_index()
228    }
229
230    /// Downcast this [`RunArray`] to a [`TypedRunArray`]
231    ///
232    /// ```
233    /// use arrow_array::{Array, ArrayAccessor, RunArray, StringArray, types::Int32Type};
234    ///
235    /// let orig = [Some("a"), Some("b"), None];
236    /// let run_array = RunArray::<Int32Type>::from_iter(orig);
237    /// let typed = run_array.downcast::<StringArray>().unwrap();
238    /// assert_eq!(typed.value(0), "a");
239    /// assert_eq!(typed.value(1), "b");
240    /// assert!(typed.values().is_null(2));
241    /// ```
242    pub fn downcast<V: 'static>(&self) -> Option<TypedRunArray<'_, R, V>> {
243        let values = self.values.as_any().downcast_ref()?;
244        Some(TypedRunArray {
245            run_array: self,
246            values,
247        })
248    }
249
250    /// Calls [`RunEndBuffer::get_physical_index`].
251    ///
252    /// The result is arbitrary if `logical_index >= self.len()`
253    pub fn get_physical_index(&self, logical_index: usize) -> usize {
254        self.run_ends.get_physical_index(logical_index)
255    }
256
257    /// Returns the physical indices corresponding to the provided logical indices.
258    ///
259    /// See [`RunEndBuffer::get_physical_indices`] for more details.
260    #[inline]
261    pub fn get_physical_indices<I>(&self, logical_indices: &[I]) -> Result<Vec<usize>, ArrowError>
262    where
263        I: ArrowNativeType,
264    {
265        self.run_ends()
266            .get_physical_indices(logical_indices)
267            .map_err(|index| {
268                ArrowError::InvalidArgumentError(format!(
269                    "Logical index {} is out of bounds for RunArray of length {}",
270                    index.as_usize(),
271                    self.len()
272                ))
273            })
274    }
275
276    /// Returns a zero-copy slice of this array with the indicated offset and length.
277    ///
278    /// # Panics
279    ///
280    /// - Specified slice (`offset` + `length`) exceeds existing length
281    pub fn slice(&self, offset: usize, length: usize) -> Self {
282        Self {
283            data_type: self.data_type.clone(),
284            run_ends: self.run_ends.slice(offset, length),
285            values: self.values.clone(),
286        }
287    }
288}
289
290impl<R: RunEndIndexType> From<ArrayData> for RunArray<R> {
291    // The method assumes the caller already validated the data using `ArrayData::validate_data()`
292    fn from(data: ArrayData) -> Self {
293        let (data_type, len, _nulls, offset, _buffers, child_data) = data.into_parts();
294
295        match &data_type {
296            DataType::RunEndEncoded(_, _) => {}
297            _ => {
298                panic!(
299                    "Invalid data type {data_type:?} for RunArray. Should be DataType::RunEndEncoded"
300                );
301            }
302        }
303
304        let [run_end_child, values_child]: [ArrayData; 2] = child_data
305            .try_into()
306            .expect("RunArray data should have exactly two child arrays");
307
308        // deconstruct the run ends child array
309        let (
310            run_end_data_type,
311            _run_end_len,
312            _run_end_nulls,
313            _run_end_offset,
314            run_end_buffers,
315            _run_end_child_data,
316        ) = run_end_child.into_parts();
317        assert_eq!(run_end_data_type, R::DATA_TYPE, "Incorrect run ends type");
318        let [run_end_buffer]: [arrow_buffer::Buffer; 1] = run_end_buffers
319            .try_into()
320            .expect("Run ends should have exactly one buffer");
321        let scalar = ScalarBuffer::from(run_end_buffer);
322        let run_ends = unsafe { RunEndBuffer::new_unchecked(scalar, offset, len) };
323
324        let values = make_array(values_child);
325
326        Self {
327            data_type,
328            run_ends,
329            values,
330        }
331    }
332}
333
334impl<R: RunEndIndexType> From<RunArray<R>> for ArrayData {
335    fn from(array: RunArray<R>) -> Self {
336        let len = array.run_ends.len();
337        let offset = array.run_ends.offset();
338
339        let run_ends = ArrayDataBuilder::new(R::DATA_TYPE)
340            .len(array.run_ends.values().len())
341            .buffers(vec![array.run_ends.into_inner().into_inner()]);
342
343        let run_ends = unsafe { run_ends.build_unchecked() };
344
345        let builder = ArrayDataBuilder::new(array.data_type)
346            .len(len)
347            .offset(offset)
348            .child_data(vec![run_ends, array.values.to_data()]);
349
350        unsafe { builder.build_unchecked() }
351    }
352}
353
354/// SAFETY: Correctly implements the contract of Arrow Arrays
355unsafe impl<T: RunEndIndexType> Array for RunArray<T> {
356    fn as_any(&self) -> &dyn Any {
357        self
358    }
359
360    fn to_data(&self) -> ArrayData {
361        self.clone().into()
362    }
363
364    fn into_data(self) -> ArrayData {
365        self.into()
366    }
367
368    fn data_type(&self) -> &DataType {
369        &self.data_type
370    }
371
372    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
373        Arc::new(self.slice(offset, length))
374    }
375
376    fn len(&self) -> usize {
377        self.run_ends.len()
378    }
379
380    fn is_empty(&self) -> bool {
381        self.run_ends.is_empty()
382    }
383
384    fn shrink_to_fit(&mut self) {
385        self.run_ends.shrink_to_fit();
386        self.values.shrink_to_fit();
387    }
388
389    fn offset(&self) -> usize {
390        self.run_ends.offset()
391    }
392
393    fn nulls(&self) -> Option<&NullBuffer> {
394        None
395    }
396
397    fn logical_nulls(&self) -> Option<NullBuffer> {
398        let len = self.len();
399        let nulls = self.values.logical_nulls()?;
400        let mut out = BooleanBufferBuilder::new(len);
401        let offset = self.run_ends.offset();
402        let mut valid_start = 0;
403        let mut last_end = 0;
404        for (idx, end) in self.run_ends.values().iter().enumerate() {
405            let end = end.as_usize();
406            if end < offset {
407                continue;
408            }
409            let end = (end - offset).min(len);
410            if nulls.is_null(idx) {
411                if valid_start < last_end {
412                    out.append_n(last_end - valid_start, true);
413                }
414                out.append_n(end - last_end, false);
415                valid_start = end;
416            }
417            last_end = end;
418            if end == len {
419                break;
420            }
421        }
422        if valid_start < len {
423            out.append_n(len - valid_start, true)
424        }
425        // Sanity check
426        assert_eq!(out.len(), len);
427        Some(out.finish().into())
428    }
429
430    fn is_nullable(&self) -> bool {
431        !self.is_empty() && self.values.is_nullable()
432    }
433
434    fn get_buffer_memory_size(&self) -> usize {
435        self.run_ends.inner().inner().capacity() + self.values.get_buffer_memory_size()
436    }
437
438    fn get_array_memory_size(&self) -> usize {
439        std::mem::size_of::<Self>()
440            + self.run_ends.inner().inner().capacity()
441            + self.values.get_array_memory_size()
442    }
443
444    #[cfg(feature = "pool")]
445    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
446        self.run_ends.claim(pool);
447        self.values.claim(pool);
448    }
449}
450
451impl<R: RunEndIndexType> std::fmt::Debug for RunArray<R> {
452    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
453        writeln!(
454            f,
455            "RunArray {{run_ends: {:?}, values: {:?}}}",
456            self.run_ends.values(),
457            self.values
458        )
459    }
460}
461
462/// Constructs a `RunArray` from an iterator of optional strings.
463///
464/// # Example:
465/// ```
466/// use arrow_array::{RunArray, PrimitiveArray, StringArray, types::Int16Type};
467///
468/// let test = vec!["a", "a", "b", "c", "c"];
469/// let array: RunArray<Int16Type> = test
470///     .iter()
471///     .map(|&x| if x == "b" { None } else { Some(x) })
472///     .collect();
473/// assert_eq!(
474///     "RunArray {run_ends: [2, 3, 5], values: StringArray\n[\n  \"a\",\n  null,\n  \"c\",\n]}\n",
475///     format!("{:?}", array)
476/// );
477/// ```
478impl<'a, T: RunEndIndexType> FromIterator<Option<&'a str>> for RunArray<T> {
479    fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
480        let it = iter.into_iter();
481        let (lower, _) = it.size_hint();
482        let mut builder = StringRunBuilder::with_capacity(lower, 256);
483        it.for_each(|i| {
484            builder.append_option(i);
485        });
486
487        builder.finish()
488    }
489}
490
491/// Constructs a `RunArray` from an iterator of strings.
492///
493/// # Example:
494///
495/// ```
496/// use arrow_array::{RunArray, PrimitiveArray, StringArray, types::Int16Type};
497///
498/// let test = vec!["a", "a", "b", "c"];
499/// let array: RunArray<Int16Type> = test.into_iter().collect();
500/// assert_eq!(
501///     "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n  \"a\",\n  \"b\",\n  \"c\",\n]}\n",
502///     format!("{:?}", array)
503/// );
504/// ```
505impl<'a, T: RunEndIndexType> FromIterator<&'a str> for RunArray<T> {
506    fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
507        let it = iter.into_iter();
508        let (lower, _) = it.size_hint();
509        let mut builder = StringRunBuilder::with_capacity(lower, 256);
510        it.for_each(|i| {
511            builder.append_value(i);
512        });
513
514        builder.finish()
515    }
516}
517
518///
519/// A [`RunArray`] with `i16` run ends
520///
521/// # Example: Using `collect`
522/// ```
523/// # use arrow_array::{Array, Int16RunArray, Int16Array, StringArray};
524/// # use std::sync::Arc;
525///
526/// let array: Int16RunArray = vec!["a", "a", "b", "c", "c"].into_iter().collect();
527/// let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", "c"]));
528/// assert_eq!(array.run_ends().values(), &[2, 3, 5]);
529/// assert_eq!(array.values(), &values);
530/// ```
531pub type Int16RunArray = RunArray<Int16Type>;
532
533///
534/// A [`RunArray`] with `i32` run ends
535///
536/// # Example: Using `collect`
537/// ```
538/// # use arrow_array::{Array, Int32RunArray, Int32Array, StringArray};
539/// # use std::sync::Arc;
540///
541/// let array: Int32RunArray = vec!["a", "a", "b", "c", "c"].into_iter().collect();
542/// let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", "c"]));
543/// assert_eq!(array.run_ends().values(), &[2, 3, 5]);
544/// assert_eq!(array.values(), &values);
545/// ```
546pub type Int32RunArray = RunArray<Int32Type>;
547
548///
549/// A [`RunArray`] with `i64` run ends
550///
551/// # Example: Using `collect`
552/// ```
553/// # use arrow_array::{Array, Int64RunArray, Int64Array, StringArray};
554/// # use std::sync::Arc;
555///
556/// let array: Int64RunArray = vec!["a", "a", "b", "c", "c"].into_iter().collect();
557/// let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", "c"]));
558/// assert_eq!(array.run_ends().values(), &[2, 3, 5]);
559/// assert_eq!(array.values(), &values);
560/// ```
561pub type Int64RunArray = RunArray<Int64Type>;
562
563/// A [`RunArray`] typed typed on its child values array
564///
565/// Implements [`ArrayAccessor`] and [`IntoIterator`] allowing fast access to its elements
566///
567/// ```
568/// use arrow_array::{RunArray, StringArray, types::Int32Type};
569///
570/// let orig = ["a", "b", "a", "b"];
571/// let ree_array = RunArray::<Int32Type>::from_iter(orig);
572///
573/// // `TypedRunArray` allows you to access the values directly
574/// let typed = ree_array.downcast::<StringArray>().unwrap();
575///
576/// for (maybe_val, orig) in typed.into_iter().zip(orig) {
577///     assert_eq!(maybe_val.unwrap(), orig)
578/// }
579/// ```
580pub struct TypedRunArray<'a, R: RunEndIndexType, V> {
581    /// The run array
582    run_array: &'a RunArray<R>,
583
584    /// The values of the run_array
585    values: &'a V,
586}
587
588// Manually implement `Clone` to avoid `V: Clone` type constraint
589impl<R: RunEndIndexType, V> Clone for TypedRunArray<'_, R, V> {
590    fn clone(&self) -> Self {
591        *self
592    }
593}
594
595impl<R: RunEndIndexType, V> Copy for TypedRunArray<'_, R, V> {}
596
597impl<R: RunEndIndexType, V> std::fmt::Debug for TypedRunArray<'_, R, V> {
598    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
599        writeln!(f, "TypedRunArray({:?})", self.run_array)
600    }
601}
602
603impl<'a, R: RunEndIndexType, V> TypedRunArray<'a, R, V> {
604    /// Returns the run_ends of this [`TypedRunArray`]
605    pub fn run_ends(&self) -> &'a RunEndBuffer<R::Native> {
606        self.run_array.run_ends()
607    }
608
609    /// Returns the values of this [`TypedRunArray`]
610    pub fn values(&self) -> &'a V {
611        self.values
612    }
613
614    /// Returns the run array of this [`TypedRunArray`]
615    pub fn run_array(&self) -> &'a RunArray<R> {
616        self.run_array
617    }
618}
619
620/// SAFETY: Correctly implements the contract of Arrow Arrays
621unsafe impl<R: RunEndIndexType, V: Sync> Array for TypedRunArray<'_, R, V> {
622    fn as_any(&self) -> &dyn Any {
623        self.run_array
624    }
625
626    fn to_data(&self) -> ArrayData {
627        self.run_array.to_data()
628    }
629
630    fn into_data(self) -> ArrayData {
631        self.run_array.into_data()
632    }
633
634    fn data_type(&self) -> &DataType {
635        self.run_array.data_type()
636    }
637
638    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
639        Arc::new(self.run_array.slice(offset, length))
640    }
641
642    fn len(&self) -> usize {
643        self.run_array.len()
644    }
645
646    fn is_empty(&self) -> bool {
647        self.run_array.is_empty()
648    }
649
650    fn offset(&self) -> usize {
651        self.run_array.offset()
652    }
653
654    fn nulls(&self) -> Option<&NullBuffer> {
655        self.run_array.nulls()
656    }
657
658    fn logical_nulls(&self) -> Option<NullBuffer> {
659        self.run_array.logical_nulls()
660    }
661
662    fn logical_null_count(&self) -> usize {
663        self.run_array.logical_null_count()
664    }
665
666    fn is_nullable(&self) -> bool {
667        self.run_array.is_nullable()
668    }
669
670    fn get_buffer_memory_size(&self) -> usize {
671        self.run_array.get_buffer_memory_size()
672    }
673
674    fn get_array_memory_size(&self) -> usize {
675        self.run_array.get_array_memory_size()
676    }
677
678    #[cfg(feature = "pool")]
679    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
680        self.run_array.claim(pool);
681    }
682}
683
684// Array accessor converts the index of logical array to the index of the physical array
685// using binary search. The time complexity is O(log N) where N is number of runs.
686impl<'a, R, V> ArrayAccessor for TypedRunArray<'a, R, V>
687where
688    R: RunEndIndexType,
689    V: Sync + Send,
690    &'a V: ArrayAccessor,
691    <&'a V as ArrayAccessor>::Item: Default,
692{
693    type Item = <&'a V as ArrayAccessor>::Item;
694
695    fn value(&self, logical_index: usize) -> Self::Item {
696        assert!(
697            logical_index < self.len(),
698            "Trying to access an element at index {} from a TypedRunArray of length {}",
699            logical_index,
700            self.len()
701        );
702        unsafe { self.value_unchecked(logical_index) }
703    }
704
705    unsafe fn value_unchecked(&self, logical_index: usize) -> Self::Item {
706        let physical_index = self.run_array.get_physical_index(logical_index);
707        unsafe { self.values().value_unchecked(physical_index) }
708    }
709}
710
711impl<'a, R, V> IntoIterator for TypedRunArray<'a, R, V>
712where
713    R: RunEndIndexType,
714    V: Sync + Send,
715    &'a V: ArrayAccessor,
716    <&'a V as ArrayAccessor>::Item: Default,
717{
718    type Item = Option<<&'a V as ArrayAccessor>::Item>;
719    type IntoIter = RunArrayIter<'a, R, V>;
720
721    fn into_iter(self) -> Self::IntoIter {
722        RunArrayIter::new(self)
723    }
724}
725
726#[cfg(test)]
727mod tests {
728    use rand::Rng;
729    use rand::rng;
730    use rand::seq::SliceRandom;
731
732    use super::*;
733    use crate::builder::PrimitiveRunBuilder;
734    use crate::cast::AsArray;
735    use crate::new_empty_array;
736    use crate::types::{Int8Type, UInt32Type};
737    use crate::{Int16Array, Int32Array, StringArray};
738
739    fn build_input_array(size: usize) -> Vec<Option<i32>> {
740        // The input array is created by shuffling and repeating
741        // the seed values random number of times.
742        let mut seed: Vec<Option<i32>> = vec![
743            None,
744            None,
745            None,
746            Some(1),
747            Some(2),
748            Some(3),
749            Some(4),
750            Some(5),
751            Some(6),
752            Some(7),
753            Some(8),
754            Some(9),
755        ];
756        let mut result: Vec<Option<i32>> = Vec::with_capacity(size);
757        let mut ix = 0;
758        let mut rng = rng();
759        // run length can go up to 8. Cap the max run length for smaller arrays to size / 2.
760        let max_run_length = 8_usize.min(1_usize.max(size / 2));
761        while result.len() < size {
762            // shuffle the seed array if all the values are iterated.
763            if ix == 0 {
764                seed.shuffle(&mut rng);
765            }
766            // repeat the items between 1 and 8 times. Cap the length for smaller sized arrays
767            let num = max_run_length.min(rng.random_range(1..=max_run_length));
768            for _ in 0..num {
769                result.push(seed[ix]);
770            }
771            ix += 1;
772            if ix == seed.len() {
773                ix = 0
774            }
775        }
776        result.resize(size, None);
777        result
778    }
779
780    // Asserts that `logical_array[logical_indices[*]] == physical_array[physical_indices[*]]`
781    fn compare_logical_and_physical_indices(
782        logical_indices: &[u32],
783        logical_array: &[Option<i32>],
784        physical_indices: &[usize],
785        physical_array: &PrimitiveArray<Int32Type>,
786    ) {
787        assert_eq!(logical_indices.len(), physical_indices.len());
788
789        // check value in logical index in the logical_array matches physical index in physical_array
790        logical_indices
791            .iter()
792            .map(|f| f.as_usize())
793            .zip(physical_indices.iter())
794            .for_each(|(logical_ix, physical_ix)| {
795                let expected = logical_array[logical_ix];
796                match expected {
797                    Some(val) => {
798                        assert!(physical_array.is_valid(*physical_ix));
799                        let actual = physical_array.value(*physical_ix);
800                        assert_eq!(val, actual);
801                    }
802                    None => {
803                        assert!(physical_array.is_null(*physical_ix))
804                    }
805                };
806            });
807    }
808    #[test]
809    fn test_run_array() {
810        // Construct a value array
811        let value_data =
812            PrimitiveArray::<Int8Type>::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
813
814        // Construct a run_ends array:
815        let run_ends_values = [4_i16, 6, 7, 9, 13, 18, 20, 22];
816        let run_ends_data =
817            PrimitiveArray::<Int16Type>::from_iter_values(run_ends_values.iter().copied());
818
819        // Construct a run ends encoded array from the above two
820        let ree_array = RunArray::<Int16Type>::try_new(&run_ends_data, &value_data).unwrap();
821
822        assert_eq!(ree_array.len(), 22);
823        assert_eq!(ree_array.null_count(), 0);
824
825        let values = ree_array.values();
826        assert_eq!(value_data.into_data(), values.to_data());
827        assert_eq!(&DataType::Int8, values.data_type());
828
829        let run_ends = ree_array.run_ends();
830        assert_eq!(run_ends.values(), &run_ends_values);
831    }
832
833    #[test]
834    fn test_run_array_empty() {
835        let runs = new_empty_array(&DataType::Int16);
836        let runs = runs.as_primitive::<Int16Type>();
837        let values = new_empty_array(&DataType::Int64);
838        let array = RunArray::try_new(runs, &values).unwrap();
839
840        fn assertions(array: &RunArray<Int16Type>) {
841            assert!(array.is_empty());
842            assert_eq!(array.get_start_physical_index(), 0);
843            assert_eq!(array.get_end_physical_index(), 0);
844            assert!(array.get_physical_indices::<i16>(&[]).unwrap().is_empty());
845            assert!(array.run_ends().is_empty());
846            assert_eq!(array.run_ends().sliced_values().count(), 0);
847        }
848
849        assertions(&array);
850        assertions(&array.slice(0, 0));
851    }
852
853    #[test]
854    fn test_run_array_fmt_debug() {
855        let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(3);
856        builder.append_value(12345678);
857        builder.append_null();
858        builder.append_value(22345678);
859        let array = builder.finish();
860        assert_eq!(
861            "RunArray {run_ends: [1, 2, 3], values: PrimitiveArray<UInt32>\n[\n  12345678,\n  null,\n  22345678,\n]}\n",
862            format!("{array:?}")
863        );
864
865        let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(20);
866        for _ in 0..20 {
867            builder.append_value(1);
868        }
869        let array = builder.finish();
870
871        assert_eq!(array.len(), 20);
872        assert_eq!(array.null_count(), 0);
873        assert_eq!(array.logical_null_count(), 0);
874
875        assert_eq!(
876            "RunArray {run_ends: [20], values: PrimitiveArray<UInt32>\n[\n  1,\n]}\n",
877            format!("{array:?}")
878        );
879    }
880
881    #[test]
882    fn test_run_array_from_iter() {
883        let test = vec!["a", "a", "b", "c"];
884        let array: RunArray<Int16Type> = test
885            .iter()
886            .map(|&x| if x == "b" { None } else { Some(x) })
887            .collect();
888        assert_eq!(
889            "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n  \"a\",\n  null,\n  \"c\",\n]}\n",
890            format!("{array:?}")
891        );
892
893        assert_eq!(array.len(), 4);
894        assert_eq!(array.null_count(), 0);
895        assert_eq!(array.logical_null_count(), 1);
896
897        let array: RunArray<Int16Type> = test.into_iter().collect();
898        assert_eq!(
899            "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n  \"a\",\n  \"b\",\n  \"c\",\n]}\n",
900            format!("{array:?}")
901        );
902    }
903
904    #[test]
905    fn test_run_array_run_ends_as_primitive_array() {
906        let test = vec!["a", "b", "c", "a"];
907        let array: RunArray<Int16Type> = test.into_iter().collect();
908
909        assert_eq!(array.len(), 4);
910        assert_eq!(array.null_count(), 0);
911        assert_eq!(array.logical_null_count(), 0);
912
913        let run_ends = array.run_ends();
914        assert_eq!(&[1, 2, 3, 4], run_ends.values());
915    }
916
917    #[test]
918    fn test_run_array_as_primitive_array_with_null() {
919        let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
920        let array: RunArray<Int32Type> = test.into_iter().collect();
921
922        assert_eq!(array.len(), 6);
923        assert_eq!(array.null_count(), 0);
924        assert_eq!(array.logical_null_count(), 3);
925
926        let run_ends = array.run_ends();
927        assert_eq!(&[1, 2, 3, 5, 6], run_ends.values());
928
929        let values_data = array.values();
930        assert_eq!(2, values_data.null_count());
931        assert_eq!(5, values_data.len());
932    }
933
934    #[test]
935    fn test_run_array_all_nulls() {
936        let test = vec![None, None, None];
937        let array: RunArray<Int32Type> = test.into_iter().collect();
938
939        assert_eq!(array.len(), 3);
940        assert_eq!(array.null_count(), 0);
941        assert_eq!(array.logical_null_count(), 3);
942
943        let run_ends = array.run_ends();
944        assert_eq!(3, run_ends.len());
945        assert_eq!(&[3], run_ends.values());
946
947        let values_data = array.values();
948        assert_eq!(1, values_data.null_count());
949    }
950
951    #[test]
952    fn test_run_array_try_new() {
953        let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
954            .into_iter()
955            .collect();
956        let run_ends: Int32Array = [Some(1), Some(2), Some(3), Some(4)].into_iter().collect();
957
958        let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
959        assert_eq!(array.values().data_type(), &DataType::Utf8);
960
961        assert_eq!(array.null_count(), 0);
962        assert_eq!(array.logical_null_count(), 1);
963        assert_eq!(array.len(), 4);
964        assert_eq!(array.values().null_count(), 1);
965
966        assert_eq!(
967            "RunArray {run_ends: [1, 2, 3, 4], values: StringArray\n[\n  \"foo\",\n  \"bar\",\n  null,\n  \"baz\",\n]}\n",
968            format!("{array:?}")
969        );
970    }
971
972    #[test]
973    fn test_run_array_int16_type_definition() {
974        let array: Int16RunArray = vec!["a", "a", "b", "c", "c"].into_iter().collect();
975        let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", "c"]));
976        assert_eq!(array.run_ends().values(), &[2, 3, 5]);
977        assert_eq!(array.values(), &values);
978    }
979
980    #[test]
981    fn test_run_array_empty_string() {
982        let array: Int16RunArray = vec!["a", "a", "", "", "c"].into_iter().collect();
983        let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "", "c"]));
984        assert_eq!(array.run_ends().values(), &[2, 4, 5]);
985        assert_eq!(array.values(), &values);
986    }
987
988    #[test]
989    fn test_run_array_length_mismatch() {
990        let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
991            .into_iter()
992            .collect();
993        let run_ends: Int32Array = [Some(1), Some(2), Some(3)].into_iter().collect();
994
995        let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
996        let expected = ArrowError::InvalidArgumentError("The run_ends array length should be the same as values array length. Run_ends array length is 3, values array length is 4".to_string());
997        assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
998    }
999
1000    #[test]
1001    fn test_run_array_run_ends_with_null() {
1002        let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1003            .into_iter()
1004            .collect();
1005        let run_ends: Int32Array = [Some(1), None, Some(3)].into_iter().collect();
1006
1007        let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
1008        let expected = ArrowError::InvalidArgumentError(
1009            "Found null values in run_ends array. The run_ends array should not have null values."
1010                .to_string(),
1011        );
1012        assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
1013    }
1014
1015    #[test]
1016    fn test_run_array_run_ends_with_zeroes() {
1017        let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1018            .into_iter()
1019            .collect();
1020        let run_ends: Int32Array = [Some(0), Some(1), Some(3)].into_iter().collect();
1021
1022        let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
1023        let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly positive. Found value 0 at index 0 that does not match the criteria.".to_string());
1024        assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
1025    }
1026
1027    #[test]
1028    fn test_run_array_run_ends_non_increasing() {
1029        let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1030            .into_iter()
1031            .collect();
1032        let run_ends: Int32Array = [Some(1), Some(4), Some(4)].into_iter().collect();
1033
1034        let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
1035        let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly increasing. Found value 4 at index 2 with previous value 4 that does not match the criteria.".to_string());
1036        assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
1037    }
1038
1039    #[test]
1040    #[should_panic(expected = "Incorrect run ends type")]
1041    fn test_run_array_run_ends_data_type_mismatch() {
1042        let a = RunArray::<Int32Type>::from_iter(["32"]);
1043        let _ = RunArray::<Int64Type>::from(a.into_data());
1044    }
1045
1046    #[test]
1047    fn test_ree_array_accessor() {
1048        let input_array = build_input_array(256);
1049
1050        // Encode the input_array to ree_array
1051        let mut builder =
1052            PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
1053        builder.extend(input_array.iter().copied());
1054        let run_array = builder.finish();
1055        let typed = run_array.downcast::<PrimitiveArray<Int32Type>>().unwrap();
1056
1057        // Access every index and check if the value in the input array matches returned value.
1058        for (i, inp_val) in input_array.iter().enumerate() {
1059            if let Some(val) = inp_val {
1060                let actual = typed.value(i);
1061                assert_eq!(*val, actual)
1062            } else {
1063                let physical_ix = run_array.get_physical_index(i);
1064                assert!(typed.values().is_null(physical_ix));
1065            };
1066        }
1067    }
1068
1069    #[test]
1070    #[cfg_attr(miri, ignore)] // Takes too long
1071    fn test_get_physical_indices() {
1072        // Test for logical lengths starting from 10 to 250 increasing by 10
1073        for logical_len in (0..250).step_by(10) {
1074            let input_array = build_input_array(logical_len);
1075
1076            // create run array using input_array
1077            let mut builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
1078            builder.extend(input_array.clone().into_iter());
1079
1080            let run_array = builder.finish();
1081            let physical_values_array = run_array.values().as_primitive::<Int32Type>();
1082
1083            // create an array consisting of all the indices repeated twice and shuffled.
1084            let mut logical_indices: Vec<u32> = (0_u32..(logical_len as u32)).collect();
1085            // add same indices once more
1086            logical_indices.append(&mut logical_indices.clone());
1087            let mut rng = rng();
1088            logical_indices.shuffle(&mut rng);
1089
1090            let physical_indices = run_array.get_physical_indices(&logical_indices).unwrap();
1091
1092            assert_eq!(logical_indices.len(), physical_indices.len());
1093
1094            // check value in logical index in the input_array matches physical index in typed_run_array
1095            compare_logical_and_physical_indices(
1096                &logical_indices,
1097                &input_array,
1098                &physical_indices,
1099                physical_values_array,
1100            );
1101        }
1102    }
1103
1104    #[test]
1105    #[cfg_attr(miri, ignore)] // Takes too long
1106    fn test_get_physical_indices_sliced() {
1107        let total_len = 80;
1108        let input_array = build_input_array(total_len);
1109
1110        // Encode the input_array to run array
1111        let mut builder =
1112            PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
1113        builder.extend(input_array.iter().copied());
1114        let run_array = builder.finish();
1115        let physical_values_array = run_array.values().as_primitive::<Int32Type>();
1116
1117        // test for all slice lengths.
1118        for slice_len in 1..=total_len {
1119            // create an array consisting of all the indices repeated twice and shuffled.
1120            let mut logical_indices: Vec<u32> = (0_u32..(slice_len as u32)).collect();
1121            // add same indices once more
1122            logical_indices.append(&mut logical_indices.clone());
1123            let mut rng = rng();
1124            logical_indices.shuffle(&mut rng);
1125
1126            // test for offset = 0 and slice length = slice_len
1127            // slice the input array using which the run array was built.
1128            let sliced_input_array = &input_array[0..slice_len];
1129
1130            // slice the run array
1131            let sliced_run_array: RunArray<Int16Type> =
1132                run_array.slice(0, slice_len).into_data().into();
1133
1134            // Get physical indices.
1135            let physical_indices = sliced_run_array
1136                .get_physical_indices(&logical_indices)
1137                .unwrap();
1138
1139            compare_logical_and_physical_indices(
1140                &logical_indices,
1141                sliced_input_array,
1142                &physical_indices,
1143                physical_values_array,
1144            );
1145
1146            // test for offset = total_len - slice_len and slice length = slice_len
1147            // slice the input array using which the run array was built.
1148            let sliced_input_array = &input_array[total_len - slice_len..total_len];
1149
1150            // slice the run array
1151            let sliced_run_array: RunArray<Int16Type> = run_array
1152                .slice(total_len - slice_len, slice_len)
1153                .into_data()
1154                .into();
1155
1156            // Get physical indices
1157            let physical_indices = sliced_run_array
1158                .get_physical_indices(&logical_indices)
1159                .unwrap();
1160
1161            compare_logical_and_physical_indices(
1162                &logical_indices,
1163                sliced_input_array,
1164                &physical_indices,
1165                physical_values_array,
1166            );
1167        }
1168    }
1169
1170    #[test]
1171    fn test_logical_nulls() {
1172        let run = Int32Array::from(vec![3, 6, 9, 12]);
1173        let values = Int32Array::from(vec![Some(0), None, Some(1), None]);
1174        let array = RunArray::try_new(&run, &values).unwrap();
1175
1176        let expected = [
1177            true, true, true, false, false, false, true, true, true, false, false, false,
1178        ];
1179
1180        let n = array.logical_nulls().unwrap();
1181        assert_eq!(n.null_count(), 6);
1182
1183        let slices = [(0, 12), (0, 2), (2, 5), (3, 0), (3, 3), (3, 4), (4, 8)];
1184        for (offset, length) in slices {
1185            let a = array.slice(offset, length);
1186            let n = a.logical_nulls().unwrap();
1187            let n = n.into_iter().collect::<Vec<_>>();
1188            assert_eq!(&n, &expected[offset..offset + length], "{offset} {length}");
1189        }
1190    }
1191
1192    #[test]
1193    fn test_run_array_eq_identical() {
1194        let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1195        let values1 = StringArray::from(vec!["a", "b", "c"]);
1196        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1197
1198        let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1199        let values2 = StringArray::from(vec!["a", "b", "c"]);
1200        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1201
1202        assert_eq!(array1, array2);
1203    }
1204
1205    #[test]
1206    fn test_run_array_ne_different_run_ends() {
1207        let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1208        let values1 = StringArray::from(vec!["a", "b", "c"]);
1209        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1210
1211        let run_ends2 = Int32Array::from(vec![1, 4, 6]);
1212        let values2 = StringArray::from(vec!["a", "b", "c"]);
1213        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1214
1215        assert_ne!(array1, array2);
1216    }
1217
1218    #[test]
1219    fn test_run_array_ne_different_values() {
1220        let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1221        let values1 = StringArray::from(vec!["a", "b", "c"]);
1222        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1223
1224        let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1225        let values2 = StringArray::from(vec!["a", "b", "d"]);
1226        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1227
1228        assert_ne!(array1, array2);
1229    }
1230
1231    #[test]
1232    fn test_run_array_eq_with_nulls() {
1233        let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1234        let values1 = StringArray::from(vec![Some("a"), None, Some("c")]);
1235        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1236
1237        let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1238        let values2 = StringArray::from(vec![Some("a"), None, Some("c")]);
1239        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1240
1241        assert_eq!(array1, array2);
1242    }
1243
1244    #[test]
1245    fn test_run_array_eq_different_run_end_types() {
1246        let run_ends_i16_1 = Int16Array::from(vec![2_i16, 4, 6]);
1247        let values_i16_1 = StringArray::from(vec!["a", "b", "c"]);
1248        let array_i16_1 = RunArray::<Int16Type>::try_new(&run_ends_i16_1, &values_i16_1).unwrap();
1249
1250        let run_ends_i16_2 = Int16Array::from(vec![2_i16, 4, 6]);
1251        let values_i16_2 = StringArray::from(vec!["a", "b", "c"]);
1252        let array_i16_2 = RunArray::<Int16Type>::try_new(&run_ends_i16_2, &values_i16_2).unwrap();
1253
1254        assert_eq!(array_i16_1, array_i16_2);
1255    }
1256
1257    #[test]
1258    fn test_run_array_values_slice() {
1259        // 0, 0, 1, 1, 1, 2...2 (15 2s)
1260        let run_ends: PrimitiveArray<Int32Type> = vec![2, 5, 20].into();
1261        let values: PrimitiveArray<Int32Type> = vec![0, 1, 2].into();
1262        let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1263
1264        let slice = array.slice(1, 4); // 0 | 1, 1, 1 |
1265        // logical indices: 1, 2, 3, 4
1266        // physical indices: 0, 1, 1, 1
1267        // values at 0 is 0
1268        // values at 1 is 1
1269        // values slice should be [0, 1]
1270        assert_eq!(slice.get_start_physical_index(), 0);
1271        assert_eq!(slice.get_end_physical_index(), 1);
1272
1273        let values_slice = slice.values_slice();
1274        let values_slice = values_slice.as_primitive::<Int32Type>();
1275        assert_eq!(values_slice.values(), &[0, 1]);
1276
1277        let slice2 = array.slice(2, 3); // 1, 1, 1
1278        // logical indices: 2, 3, 4
1279        // physical indices: 1, 1, 1
1280        assert_eq!(slice2.get_start_physical_index(), 1);
1281        assert_eq!(slice2.get_end_physical_index(), 1);
1282
1283        let values_slice2 = slice2.values_slice();
1284        let values_slice2 = values_slice2.as_primitive::<Int32Type>();
1285        assert_eq!(values_slice2.values(), &[1]);
1286    }
1287
1288    #[test]
1289    fn test_run_array_values_slice_empty() {
1290        let run_ends = Int32Array::from(vec![2, 5, 10]);
1291        let values = StringArray::from(vec!["a", "b", "c"]);
1292        let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1293
1294        let slice = array.slice(0, 0);
1295        assert_eq!(slice.len(), 0);
1296
1297        let values_slice = slice.values_slice();
1298        assert_eq!(values_slice.len(), 0);
1299        assert_eq!(values_slice.data_type(), &DataType::Utf8);
1300    }
1301
1302    #[test]
1303    fn test_run_array_eq_empty() {
1304        let run_ends = Int32Array::from(vec![2, 5, 10]);
1305        let values = StringArray::from(vec!["a", "b", "c"]);
1306        let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1307
1308        let slice1 = array.slice(0, 0);
1309        let slice2 = array.slice(1, 0);
1310        let slice3 = array.slice(10, 0);
1311
1312        assert_eq!(slice1, slice2);
1313        assert_eq!(slice2, slice3);
1314
1315        let empty_array = new_empty_array(array.data_type());
1316        let empty_array = crate::cast::as_run_array::<Int32Type>(empty_array.as_ref());
1317
1318        assert_eq!(&slice1, empty_array);
1319    }
1320
1321    #[test]
1322    fn test_run_array_eq_diff_physical_same_logical() {
1323        let run_ends1 = Int32Array::from(vec![1, 3, 6]);
1324        let values1 = StringArray::from(vec!["a", "b", "c"]);
1325        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1326
1327        let run_ends2 = Int32Array::from(vec![1, 2, 3, 4, 5, 6]);
1328        let values2 = StringArray::from(vec!["a", "b", "b", "c", "c", "c"]);
1329        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1330
1331        assert_eq!(array1, array2);
1332    }
1333
1334    #[test]
1335    fn test_run_array_eq_sliced() {
1336        let run_ends1 = Int32Array::from(vec![2, 5, 10]);
1337        let values1 = StringArray::from(vec!["a", "b", "c"]);
1338        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1339        // Logical: a, a, b, b, b, c, c, c, c, c
1340
1341        let slice1 = array1.slice(1, 6);
1342        // Logical: a, b, b, b, c, c
1343
1344        let run_ends2 = Int32Array::from(vec![1, 4, 6]);
1345        let values2 = StringArray::from(vec!["a", "b", "c"]);
1346        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1347        // Logical: a, b, b, b, c, c
1348
1349        assert_eq!(slice1, array2);
1350
1351        let slice2 = array1.slice(2, 3);
1352        // Logical: b, b, b
1353        let run_ends3 = Int32Array::from(vec![3]);
1354        let values3 = StringArray::from(vec!["b"]);
1355        let array3 = RunArray::<Int32Type>::try_new(&run_ends3, &values3).unwrap();
1356        assert_eq!(slice2, array3);
1357    }
1358
1359    #[test]
1360    fn test_run_array_eq_sliced_different_offsets() {
1361        let run_ends1 = Int32Array::from(vec![2, 5, 10]);
1362        let values1 = StringArray::from(vec!["a", "b", "c"]);
1363        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1364        let array2 = array1.clone();
1365        assert_eq!(array1, array2);
1366
1367        let slice1 = array1.slice(1, 4); // a, b, b, b
1368        let slice2 = array1.slice(1, 4);
1369        assert_eq!(slice1, slice2);
1370
1371        let slice3 = array1.slice(0, 4); // a, a, b, b
1372        assert_ne!(slice1, slice3);
1373    }
1374
1375    #[test]
1376    #[cfg(not(feature = "force_validate"))]
1377    fn allow_to_create_invalid_array_using_new_unchecked() {
1378        let valid = RunArray::<Int32Type>::from_iter(["32"]);
1379        let (_, buffer, values) = valid.into_parts();
1380
1381        let _ = unsafe {
1382            // mismatch data type
1383            RunArray::<Int32Type>::new_unchecked(DataType::Int64, buffer, values)
1384        };
1385    }
1386
1387    #[test]
1388    #[should_panic(
1389        expected = "Invalid data type Int64 for RunArray. Should be DataType::RunEndEncoded"
1390    )]
1391    #[cfg(feature = "force_validate")]
1392    fn should_not_be_able_to_create_invalid_array_using_new_unchecked_when_force_validate_is_enabled()
1393     {
1394        let valid = RunArray::<Int32Type>::from_iter(["32"]);
1395        let (_, buffer, values) = valid.into_parts();
1396
1397        let _ = unsafe {
1398            // mismatch data type
1399            RunArray::<Int32Type>::new_unchecked(DataType::Int64, buffer, values)
1400        };
1401    }
1402
1403    #[test]
1404    fn test_run_array_roundtrip() {
1405        let run = Int32Array::from(vec![3, 6, 9, 12]);
1406        let values = Int32Array::from(vec![Some(0), None, Some(1), None]);
1407        let array = RunArray::try_new(&run, &values).unwrap();
1408
1409        let (dt, buffer, values) = array.clone().into_parts();
1410        let created_from_parts =
1411            unsafe { RunArray::<Int32Type>::new_unchecked(dt, buffer, values) };
1412        assert_eq!(array, created_from_parts);
1413    }
1414}
arrow_array/array/run_array.rs

arrow_array/array/
run_array.rs