datafusion_functions_nested/
reverse.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ScalarUDFImpl`] definitions for array_reverse function.
19
20use crate::utils::make_scalar_function;
21use arrow::array::{
22    Array, ArrayRef, Capacities, FixedSizeListArray, GenericListArray,
23    GenericListViewArray, MutableArrayData, OffsetSizeTrait, UInt32Array,
24};
25use arrow::buffer::{OffsetBuffer, ScalarBuffer};
26use arrow::compute::take;
27use arrow::datatypes::DataType::{
28    FixedSizeList, LargeList, LargeListView, List, ListView, Null,
29};
30use arrow::datatypes::{DataType, FieldRef};
31use datafusion_common::cast::{
32    as_fixed_size_list_array, as_large_list_array, as_large_list_view_array,
33    as_list_array, as_list_view_array,
34};
35use datafusion_common::{exec_err, utils::take_function_args, Result};
36use datafusion_expr::{
37    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
38};
39use datafusion_macros::user_doc;
40use itertools::Itertools;
41use std::any::Any;
42use std::sync::Arc;
43
44make_udf_expr_and_func!(
45    ArrayReverse,
46    array_reverse,
47    array,
48    "reverses the order of elements in the array.",
49    array_reverse_udf
50);
51
52#[user_doc(
53    doc_section(label = "Array Functions"),
54    description = "Returns the array with the order of the elements reversed.",
55    syntax_example = "array_reverse(array)",
56    sql_example = r#"```sql
57> select array_reverse([1, 2, 3, 4]);
58+------------------------------------------------------------+
59| array_reverse(List([1, 2, 3, 4]))                          |
60+------------------------------------------------------------+
61| [4, 3, 2, 1]                                               |
62+------------------------------------------------------------+
63```"#,
64    argument(
65        name = "array",
66        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
67    )
68)]
69#[derive(Debug, PartialEq, Eq, Hash)]
70pub struct ArrayReverse {
71    signature: Signature,
72    aliases: Vec<String>,
73}
74
75impl Default for ArrayReverse {
76    fn default() -> Self {
77        Self::new()
78    }
79}
80
81impl ArrayReverse {
82    pub fn new() -> Self {
83        Self {
84            signature: Signature::array(Volatility::Immutable),
85            aliases: vec!["list_reverse".to_string()],
86        }
87    }
88}
89
90impl ScalarUDFImpl for ArrayReverse {
91    fn as_any(&self) -> &dyn Any {
92        self
93    }
94
95    fn name(&self) -> &str {
96        "array_reverse"
97    }
98
99    fn signature(&self) -> &Signature {
100        &self.signature
101    }
102
103    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
104        Ok(arg_types[0].clone())
105    }
106
107    fn invoke_with_args(
108        &self,
109        args: datafusion_expr::ScalarFunctionArgs,
110    ) -> Result<ColumnarValue> {
111        make_scalar_function(array_reverse_inner)(&args.args)
112    }
113
114    fn aliases(&self) -> &[String] {
115        &self.aliases
116    }
117
118    fn documentation(&self) -> Option<&Documentation> {
119        self.doc()
120    }
121}
122
123/// array_reverse SQL function
124pub fn array_reverse_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
125    let [input_array] = take_function_args("array_reverse", arg)?;
126
127    match &input_array.data_type() {
128        List(field) => {
129            let array = as_list_array(input_array)?;
130            general_array_reverse::<i32>(array, field)
131        }
132        LargeList(field) => {
133            let array = as_large_list_array(input_array)?;
134            general_array_reverse::<i64>(array, field)
135        }
136        FixedSizeList(field, _) => {
137            let array = as_fixed_size_list_array(input_array)?;
138            fixed_size_array_reverse(array, field)
139        }
140        Null => Ok(Arc::clone(input_array)),
141        ListView(field) => {
142            let array = as_list_view_array(input_array)?;
143            list_view_reverse::<i32>(array, field)
144        }
145        LargeListView(field) => {
146            let array = as_large_list_view_array(input_array)?;
147            list_view_reverse::<i64>(array, field)
148        }
149        array_type => exec_err!("array_reverse does not support type '{array_type}'."),
150    }
151}
152
153fn general_array_reverse<O: OffsetSizeTrait>(
154    array: &GenericListArray<O>,
155    field: &FieldRef,
156) -> Result<ArrayRef> {
157    let values = array.values();
158    let original_data = values.to_data();
159    let capacity = Capacities::Array(original_data.len());
160    let mut offsets = vec![O::usize_as(0)];
161    let mut mutable =
162        MutableArrayData::with_capacities(vec![&original_data], false, capacity);
163
164    for (row_index, (&start, &end)) in array.offsets().iter().tuple_windows().enumerate()
165    {
166        // skip the null value
167        if array.is_null(row_index) {
168            offsets.push(offsets[row_index]);
169            continue;
170        }
171
172        let mut index = end - O::one();
173        while index >= start {
174            mutable.extend(0, index.to_usize().unwrap(), index.to_usize().unwrap() + 1);
175            index = index - O::one();
176        }
177        let size = end - start;
178        offsets.push(offsets[row_index] + size);
179    }
180
181    let data = mutable.freeze();
182    Ok(Arc::new(GenericListArray::<O>::try_new(
183        Arc::clone(field),
184        OffsetBuffer::<O>::new(offsets.into()),
185        arrow::array::make_array(data),
186        array.nulls().cloned(),
187    )?))
188}
189
190/// Reverses a list view array.
191///
192/// Construct indices, sizes and offsets for the reversed array by iterating over
193/// the list view array in the logical order, and reversing the order of the elements.
194/// We end up with a list view array where the elements are in order,
195/// even if the original array had elements out of order.
196fn list_view_reverse<O: OffsetSizeTrait>(
197    array: &GenericListViewArray<O>,
198    field: &FieldRef,
199) -> Result<ArrayRef> {
200    let offsets = array.offsets();
201    let values = array.values();
202    let sizes = array.sizes();
203
204    let mut new_offsets: Vec<O> = Vec::with_capacity(offsets.len());
205    let mut indices: Vec<O> = Vec::with_capacity(values.len());
206    let mut new_sizes = Vec::with_capacity(sizes.len());
207
208    let mut current_offset = O::zero();
209    for (row_index, offset) in offsets.iter().enumerate() {
210        new_offsets.push(current_offset);
211
212        // If this array is null, we set its size to 0 and continue
213        if array.is_null(row_index) {
214            new_sizes.push(O::zero());
215            continue;
216        }
217        let size = sizes[row_index];
218        new_sizes.push(size);
219
220        // Each array is located at [offset, offset + size), collect indices in the reverse order
221        let array_start = *offset;
222        let array_end = array_start + size;
223        let mut idx = array_end - O::one();
224        while idx >= array_start {
225            indices.push(idx);
226            idx = idx - O::one();
227        }
228
229        current_offset += size;
230    }
231
232    // Materialize values from underlying array with take
233    let indices_array: ArrayRef = if O::IS_LARGE {
234        Arc::new(arrow::array::UInt64Array::from(
235            indices
236                .iter()
237                .map(|i| i.as_usize() as u64)
238                .collect::<Vec<_>>(),
239        ))
240    } else {
241        Arc::new(UInt32Array::from(
242            indices
243                .iter()
244                .map(|i| i.as_usize() as u32)
245                .collect::<Vec<_>>(),
246        ))
247    };
248    let values_reversed = take(&values, &indices_array, None)?;
249
250    Ok(Arc::new(GenericListViewArray::<O>::try_new(
251        Arc::clone(field),
252        ScalarBuffer::from(new_offsets),
253        ScalarBuffer::from(new_sizes),
254        values_reversed,
255        array.nulls().cloned(),
256    )?))
257}
258
259fn fixed_size_array_reverse(
260    array: &FixedSizeListArray,
261    field: &FieldRef,
262) -> Result<ArrayRef> {
263    let values = array.values();
264    let original_data = values.to_data();
265    let capacity = Capacities::Array(original_data.len());
266    let mut mutable =
267        MutableArrayData::with_capacities(vec![&original_data], false, capacity);
268    let value_length = array.value_length() as usize;
269
270    for row_index in 0..array.len() {
271        // skip the null value
272        if array.is_null(row_index) {
273            mutable.extend(0, 0, value_length);
274            continue;
275        }
276        let start = row_index * value_length;
277        let end = start + value_length;
278        for idx in (start..end).rev() {
279            mutable.extend(0, idx, idx + 1);
280        }
281    }
282
283    let data = mutable.freeze();
284    Ok(Arc::new(FixedSizeListArray::try_new(
285        Arc::clone(field),
286        array.value_length(),
287        arrow::array::make_array(data),
288        array.nulls().cloned(),
289    )?))
290}
291
292#[cfg(test)]
293mod tests {
294    use crate::reverse::list_view_reverse;
295    use arrow::{
296        array::{
297            AsArray, GenericListViewArray, Int32Array, LargeListViewArray, ListViewArray,
298            OffsetSizeTrait,
299        },
300        buffer::{NullBuffer, ScalarBuffer},
301        datatypes::{DataType, Field, Int32Type},
302    };
303    use datafusion_common::Result;
304    use std::sync::Arc;
305
306    fn list_view_values<O: OffsetSizeTrait>(
307        array: &GenericListViewArray<O>,
308    ) -> Vec<Option<Vec<i32>>> {
309        array
310            .iter()
311            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
312            .collect()
313    }
314
315    #[test]
316    fn test_reverse_list_view() -> Result<()> {
317        let field = Arc::new(Field::new("a", DataType::Int32, false));
318        let offsets = ScalarBuffer::from(vec![0, 1, 6, 6]);
319        let sizes = ScalarBuffer::from(vec![1, 5, 0, 3]);
320        let values = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]));
321        let nulls = Some(NullBuffer::from(vec![true, true, false, true]));
322        let list_view = ListViewArray::new(field, offsets, sizes, values, nulls);
323        let result = list_view_reverse(
324            &list_view,
325            &Arc::new(Field::new("test", DataType::Int32, true)),
326        )?;
327        let reversed = list_view_values(result.as_list_view::<i32>());
328        let expected = vec![
329            Some(vec![1]),
330            Some(vec![6, 5, 4, 3, 2]),
331            None,
332            Some(vec![9, 8, 7]),
333        ];
334        assert_eq!(expected, reversed);
335        Ok(())
336    }
337
338    #[test]
339    fn test_reverse_large_list_view() -> Result<()> {
340        let field = Arc::new(Field::new("a", DataType::Int32, false));
341        let offsets = ScalarBuffer::from(vec![0, 1, 6, 6]);
342        let sizes = ScalarBuffer::from(vec![1, 5, 0, 3]);
343        let values = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]));
344        let nulls = Some(NullBuffer::from(vec![true, true, false, true]));
345        let list_view = LargeListViewArray::new(field, offsets, sizes, values, nulls);
346        let result = list_view_reverse(
347            &list_view,
348            &Arc::new(Field::new("test", DataType::Int32, true)),
349        )?;
350        let reversed = list_view_values(result.as_list_view::<i64>());
351        let expected = vec![
352            Some(vec![1]),
353            Some(vec![6, 5, 4, 3, 2]),
354            None,
355            Some(vec![9, 8, 7]),
356        ];
357        assert_eq!(expected, reversed);
358        Ok(())
359    }
360
361    #[test]
362    fn test_reverse_list_view_out_of_order() -> Result<()> {
363        let field = Arc::new(Field::new("a", DataType::Int32, false));
364        let offsets = ScalarBuffer::from(vec![6, 1, 6, 0]); // out of order
365        let sizes = ScalarBuffer::from(vec![3, 5, 0, 1]);
366        let values = Arc::new(Int32Array::from(vec![
367            1, // fourth array: offset 0, size 1
368            2, 3, 4, 5, 6, // second array: offset 1, size 5
369            // third array: offset 6, size 0 (and null)
370            7, 8, 9, // first array: offset 6, size 3
371        ]));
372        let nulls = Some(NullBuffer::from(vec![true, true, false, true]));
373        let list_view = ListViewArray::new(field, offsets, sizes, values, nulls);
374        let result = list_view_reverse(
375            &list_view,
376            &Arc::new(Field::new("test", DataType::Int32, true)),
377        )?;
378        let reversed = list_view_values(result.as_list_view::<i32>());
379        let expected = vec![
380            Some(vec![9, 8, 7]),
381            Some(vec![6, 5, 4, 3, 2]),
382            None,
383            Some(vec![1]),
384        ];
385        assert_eq!(expected, reversed);
386        Ok(())
387    }
388
389    #[test]
390    fn test_reverse_list_view_with_nulls() -> Result<()> {
391        let field = Arc::new(Field::new("a", DataType::Int32, false));
392        let offsets = ScalarBuffer::from(vec![16, 1, 6, 0]); // out of order
393        let sizes = ScalarBuffer::from(vec![3, 5, 10, 1]);
394        let values = Arc::new(Int32Array::from(vec![
395            1, // fourth array: offset 0, size 1
396            2, 3, 4, 5, 6, // second array: offset 1, size 5
397            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, // third array: offset 6, size 10
398            7, 8, 9, // first array: offset 6, size 3
399        ]));
400        let nulls = Some(NullBuffer::from(vec![true, true, false, true]));
401        let list_view = ListViewArray::new(field, offsets, sizes, values, nulls);
402        let result = list_view_reverse(
403            &list_view,
404            &Arc::new(Field::new("test", DataType::Int32, true)),
405        )?;
406        let reversed = list_view_values(result.as_list_view::<i32>());
407        let expected = vec![
408            Some(vec![9, 8, 7]),
409            Some(vec![6, 5, 4, 3, 2]),
410            None,
411            Some(vec![1]),
412        ];
413        assert_eq!(expected, reversed);
414        Ok(())
415    }
416
417    #[test]
418    fn test_reverse_list_view_empty() -> Result<()> {
419        let field = Arc::new(Field::new("a", DataType::Int32, false));
420        let offsets = ScalarBuffer::from(vec![]);
421        let sizes = ScalarBuffer::from(vec![]);
422        let empty_array: Vec<i32> = vec![];
423        let values = Arc::new(Int32Array::from(empty_array));
424        let nulls = None;
425        let list_view = ListViewArray::new(field, offsets, sizes, values, nulls);
426        let result = list_view_reverse(
427            &list_view,
428            &Arc::new(Field::new("test", DataType::Int32, true)),
429        )?;
430        let reversed = list_view_values(result.as_list_view::<i32>());
431        let expected: Vec<Option<Vec<i32>>> = vec![];
432        assert_eq!(expected, reversed);
433        Ok(())
434    }
435
436    #[test]
437    fn test_reverse_list_view_all_nulls() -> Result<()> {
438        let field = Arc::new(Field::new("a", DataType::Int32, false));
439        let offsets = ScalarBuffer::from(vec![0, 1, 2, 3]);
440        let sizes = ScalarBuffer::from(vec![0, 1, 1, 1]);
441        let values = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
442        let nulls = Some(NullBuffer::from(vec![false, false, false, false]));
443        let list_view = ListViewArray::new(field, offsets, sizes, values, nulls);
444        let result = list_view_reverse(
445            &list_view,
446            &Arc::new(Field::new("test", DataType::Int32, true)),
447        )?;
448        let reversed = list_view_values(result.as_list_view::<i32>());
449        let expected: Vec<Option<Vec<i32>>> = vec![None, None, None, None];
450        assert_eq!(expected, reversed);
451        Ok(())
452    }
453}