datafusion_common/utils/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! This module provides the bisect function, which implements binary search.
19
20pub mod expr;
21pub mod memory;
22pub mod proxy;
23pub mod string_utils;
24
25use crate::error::{_exec_datafusion_err, _internal_datafusion_err, _internal_err};
26use crate::{DataFusionError, Result, ScalarValue};
27use arrow::array::{
28    cast::AsArray, Array, ArrayRef, FixedSizeListArray, LargeListArray, ListArray,
29    OffsetSizeTrait,
30};
31use arrow::buffer::OffsetBuffer;
32use arrow::compute::{partition, SortColumn, SortOptions};
33use arrow::datatypes::{DataType, Field, SchemaRef};
34use sqlparser::ast::Ident;
35use sqlparser::dialect::GenericDialect;
36use sqlparser::parser::Parser;
37use std::borrow::{Borrow, Cow};
38use std::cmp::{min, Ordering};
39use std::collections::HashSet;
40use std::num::NonZero;
41use std::ops::Range;
42use std::sync::Arc;
43use std::thread::available_parallelism;
44
45/// Applies an optional projection to a [`SchemaRef`], returning the
46/// projected schema
47///
48/// Example:
49/// ```
50/// use arrow::datatypes::{SchemaRef, Schema, Field, DataType};
51/// use datafusion_common::project_schema;
52///
53/// // Schema with columns 'a', 'b', and 'c'
54/// let schema = SchemaRef::new(Schema::new(vec![
55///   Field::new("a", DataType::Int32, true),
56///   Field::new("b", DataType::Int64, true),
57///   Field::new("c", DataType::Utf8, true),
58/// ]));
59///
60/// // Pick columns 'c' and 'b'
61/// let projection = Some(vec![2,1]);
62/// let projected_schema = project_schema(
63///    &schema,
64///    projection.as_ref()
65///  ).unwrap();
66///
67/// let expected_schema = SchemaRef::new(Schema::new(vec![
68///   Field::new("c", DataType::Utf8, true),
69///   Field::new("b", DataType::Int64, true),
70/// ]));
71///
72/// assert_eq!(projected_schema, expected_schema);
73/// ```
74pub fn project_schema(
75    schema: &SchemaRef,
76    projection: Option<&Vec<usize>>,
77) -> Result<SchemaRef> {
78    let schema = match projection {
79        Some(columns) => Arc::new(schema.project(columns)?),
80        None => Arc::clone(schema),
81    };
82    Ok(schema)
83}
84
85/// Extracts a row at the specified index from a set of columns and stores it in the provided buffer.
86pub fn extract_row_at_idx_to_buf(
87    columns: &[ArrayRef],
88    idx: usize,
89    buf: &mut Vec<ScalarValue>,
90) -> Result<()> {
91    buf.clear();
92
93    let iter = columns
94        .iter()
95        .map(|arr| ScalarValue::try_from_array(arr, idx));
96    for v in iter.into_iter() {
97        buf.push(v?);
98    }
99
100    Ok(())
101}
102/// Given column vectors, returns row at `idx`.
103pub fn get_row_at_idx(columns: &[ArrayRef], idx: usize) -> Result<Vec<ScalarValue>> {
104    columns
105        .iter()
106        .map(|arr| ScalarValue::try_from_array(arr, idx))
107        .collect()
108}
109
110/// This function compares two tuples depending on the given sort options.
111pub fn compare_rows(
112    x: &[ScalarValue],
113    y: &[ScalarValue],
114    sort_options: &[SortOptions],
115) -> Result<Ordering> {
116    let zip_it = x.iter().zip(y.iter()).zip(sort_options.iter());
117    // Preserving lexical ordering.
118    for ((lhs, rhs), sort_options) in zip_it {
119        // Consider all combinations of NULLS FIRST/LAST and ASC/DESC configurations.
120        let result = match (lhs.is_null(), rhs.is_null(), sort_options.nulls_first) {
121            (true, false, false) | (false, true, true) => Ordering::Greater,
122            (true, false, true) | (false, true, false) => Ordering::Less,
123            (false, false, _) => if sort_options.descending {
124                rhs.partial_cmp(lhs)
125            } else {
126                lhs.partial_cmp(rhs)
127            }
128            .ok_or_else(|| {
129                _internal_datafusion_err!("Column array shouldn't be empty")
130            })?,
131            (true, true, _) => continue,
132        };
133        if result != Ordering::Equal {
134            return Ok(result);
135        }
136    }
137    Ok(Ordering::Equal)
138}
139
140/// This function searches for a tuple of given values (`target`) among the given
141/// rows (`item_columns`) using the bisection algorithm. It assumes that `item_columns`
142/// is sorted according to `sort_options` and returns the insertion index of `target`.
143/// Template argument `SIDE` being `true`/`false` means left/right insertion.
144pub fn bisect<const SIDE: bool>(
145    item_columns: &[ArrayRef],
146    target: &[ScalarValue],
147    sort_options: &[SortOptions],
148) -> Result<usize> {
149    let low: usize = 0;
150    let high: usize = item_columns
151        .first()
152        .ok_or_else(|| {
153            DataFusionError::Internal("Column array shouldn't be empty".to_string())
154        })?
155        .len();
156    let compare_fn = |current: &[ScalarValue], target: &[ScalarValue]| {
157        let cmp = compare_rows(current, target, sort_options)?;
158        Ok(if SIDE { cmp.is_lt() } else { cmp.is_le() })
159    };
160    find_bisect_point(item_columns, target, compare_fn, low, high)
161}
162
163/// This function searches for a tuple of given values (`target`) among a slice of
164/// the given rows (`item_columns`) using the bisection algorithm. The slice starts
165/// at the index `low` and ends at the index `high`. The boolean-valued function
166/// `compare_fn` specifies whether we bisect on the left (by returning `false`),
167/// or on the right (by returning `true`) when we compare the target value with
168/// the current value as we iteratively bisect the input.
169pub fn find_bisect_point<F>(
170    item_columns: &[ArrayRef],
171    target: &[ScalarValue],
172    compare_fn: F,
173    mut low: usize,
174    mut high: usize,
175) -> Result<usize>
176where
177    F: Fn(&[ScalarValue], &[ScalarValue]) -> Result<bool>,
178{
179    while low < high {
180        let mid = ((high - low) / 2) + low;
181        let val = get_row_at_idx(item_columns, mid)?;
182        if compare_fn(&val, target)? {
183            low = mid + 1;
184        } else {
185            high = mid;
186        }
187    }
188    Ok(low)
189}
190
191/// This function searches for a tuple of given values (`target`) among the given
192/// rows (`item_columns`) via a linear scan. It assumes that `item_columns` is sorted
193/// according to `sort_options` and returns the insertion index of `target`.
194/// Template argument `SIDE` being `true`/`false` means left/right insertion.
195pub fn linear_search<const SIDE: bool>(
196    item_columns: &[ArrayRef],
197    target: &[ScalarValue],
198    sort_options: &[SortOptions],
199) -> Result<usize> {
200    let low: usize = 0;
201    let high: usize = item_columns
202        .first()
203        .ok_or_else(|| {
204            DataFusionError::Internal("Column array shouldn't be empty".to_string())
205        })?
206        .len();
207    let compare_fn = |current: &[ScalarValue], target: &[ScalarValue]| {
208        let cmp = compare_rows(current, target, sort_options)?;
209        Ok(if SIDE { cmp.is_lt() } else { cmp.is_le() })
210    };
211    search_in_slice(item_columns, target, compare_fn, low, high)
212}
213
214/// This function searches for a tuple of given values (`target`) among a slice of
215/// the given rows (`item_columns`) via a linear scan. The slice starts at the index
216/// `low` and ends at the index `high`. The boolean-valued function `compare_fn`
217/// specifies the stopping criterion.
218pub fn search_in_slice<F>(
219    item_columns: &[ArrayRef],
220    target: &[ScalarValue],
221    compare_fn: F,
222    mut low: usize,
223    high: usize,
224) -> Result<usize>
225where
226    F: Fn(&[ScalarValue], &[ScalarValue]) -> Result<bool>,
227{
228    while low < high {
229        let val = get_row_at_idx(item_columns, low)?;
230        if !compare_fn(&val, target)? {
231            break;
232        }
233        low += 1;
234    }
235    Ok(low)
236}
237
238/// Given a list of 0 or more already sorted columns, finds the
239/// partition ranges that would partition equally across columns.
240///
241/// See [`partition`] for more details.
242pub fn evaluate_partition_ranges(
243    num_rows: usize,
244    partition_columns: &[SortColumn],
245) -> Result<Vec<Range<usize>>> {
246    Ok(if partition_columns.is_empty() {
247        vec![Range {
248            start: 0,
249            end: num_rows,
250        }]
251    } else {
252        let cols: Vec<_> = partition_columns
253            .iter()
254            .map(|x| Arc::clone(&x.values))
255            .collect();
256        partition(&cols)?.ranges()
257    })
258}
259
260/// Wraps identifier string in double quotes, escaping any double quotes in
261/// the identifier by replacing it with two double quotes
262///
263/// e.g. identifier `tab.le"name` becomes `"tab.le""name"`
264pub fn quote_identifier(s: &str) -> Cow<str> {
265    if needs_quotes(s) {
266        Cow::Owned(format!("\"{}\"", s.replace('"', "\"\"")))
267    } else {
268        Cow::Borrowed(s)
269    }
270}
271
272/// returns true if this identifier needs quotes
273fn needs_quotes(s: &str) -> bool {
274    let mut chars = s.chars();
275
276    // first char can not be a number unless escaped
277    if let Some(first_char) = chars.next() {
278        if !(first_char.is_ascii_lowercase() || first_char == '_') {
279            return true;
280        }
281    }
282
283    !chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
284}
285
286pub(crate) fn parse_identifiers(s: &str) -> Result<Vec<Ident>> {
287    let dialect = GenericDialect;
288    let mut parser = Parser::new(&dialect).try_with_sql(s)?;
289    let idents = parser.parse_multipart_identifier()?;
290    Ok(idents)
291}
292
293pub(crate) fn parse_identifiers_normalized(s: &str, ignore_case: bool) -> Vec<String> {
294    parse_identifiers(s)
295        .unwrap_or_default()
296        .into_iter()
297        .map(|id| match id.quote_style {
298            Some(_) => id.value,
299            None if ignore_case => id.value,
300            _ => id.value.to_ascii_lowercase(),
301        })
302        .collect::<Vec<_>>()
303}
304
305/// This function "takes" the elements at `indices` from the slice `items`.
306pub fn get_at_indices<T: Clone, I: Borrow<usize>>(
307    items: &[T],
308    indices: impl IntoIterator<Item = I>,
309) -> Result<Vec<T>> {
310    indices
311        .into_iter()
312        .map(|idx| items.get(*idx.borrow()).cloned())
313        .collect::<Option<Vec<T>>>()
314        .ok_or_else(|| {
315            DataFusionError::Execution(
316                "Expects indices to be in the range of searched vector".to_string(),
317            )
318        })
319}
320
321/// This function finds the longest prefix of the form 0, 1, 2, ... within the
322/// collection `sequence`. Examples:
323/// - For 0, 1, 2, 4, 5; we would produce 3, meaning 0, 1, 2 is the longest satisfying
324///   prefix.
325/// - For 1, 2, 3, 4; we would produce 0, meaning there is no such prefix.
326pub fn longest_consecutive_prefix<T: Borrow<usize>>(
327    sequence: impl IntoIterator<Item = T>,
328) -> usize {
329    let mut count = 0;
330    for item in sequence {
331        if !count.eq(item.borrow()) {
332            break;
333        }
334        count += 1;
335    }
336    count
337}
338
339/// Creates single element [`ListArray`], [`LargeListArray`] and
340/// [`FixedSizeListArray`] from other arrays
341///
342/// For example this builder can convert `[1, 2, 3]` into `[[1, 2, 3]]`
343///
344/// # Example
345/// ```
346/// # use std::sync::Arc;
347/// # use arrow::array::{Array, ListArray};
348/// # use arrow::array::types::Int64Type;
349/// # use datafusion_common::utils::SingleRowListArrayBuilder;
350/// // Array is [1, 2, 3]
351/// let arr = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
352///       Some(vec![Some(1), Some(2), Some(3)]),
353/// ]);
354/// // Wrap as a list array: [[1, 2, 3]]
355/// let list_arr = SingleRowListArrayBuilder::new(Arc::new(arr)).build_list_array();
356/// assert_eq!(list_arr.len(), 1);
357/// ```
358#[derive(Debug, Clone)]
359pub struct SingleRowListArrayBuilder {
360    /// array to be wrapped
361    arr: ArrayRef,
362    /// Should the resulting array be nullable? Defaults to `true`.
363    nullable: bool,
364    /// Specify the field name for the resulting array. Defaults to value used in
365    /// [`Field::new_list_field`]
366    field_name: Option<String>,
367}
368
369impl SingleRowListArrayBuilder {
370    /// Create a new instance of [`SingleRowListArrayBuilder`]
371    pub fn new(arr: ArrayRef) -> Self {
372        Self {
373            arr,
374            nullable: true,
375            field_name: None,
376        }
377    }
378
379    /// Set the nullable flag
380    pub fn with_nullable(mut self, nullable: bool) -> Self {
381        self.nullable = nullable;
382        self
383    }
384
385    /// sets the field name for the resulting array
386    pub fn with_field_name(mut self, field_name: Option<String>) -> Self {
387        self.field_name = field_name;
388        self
389    }
390
391    /// Copies field name and nullable from the specified field
392    pub fn with_field(self, field: &Field) -> Self {
393        self.with_field_name(Some(field.name().to_owned()))
394            .with_nullable(field.is_nullable())
395    }
396
397    /// Build a single element [`ListArray`]
398    pub fn build_list_array(self) -> ListArray {
399        let (field, arr) = self.into_field_and_arr();
400        let offsets = OffsetBuffer::from_lengths([arr.len()]);
401        ListArray::new(field, offsets, arr, None)
402    }
403
404    /// Build a single element [`ListArray`] and wrap as [`ScalarValue::List`]
405    pub fn build_list_scalar(self) -> ScalarValue {
406        ScalarValue::List(Arc::new(self.build_list_array()))
407    }
408
409    /// Build a single element [`LargeListArray`]
410    pub fn build_large_list_array(self) -> LargeListArray {
411        let (field, arr) = self.into_field_and_arr();
412        let offsets = OffsetBuffer::from_lengths([arr.len()]);
413        LargeListArray::new(field, offsets, arr, None)
414    }
415
416    /// Build a single element [`LargeListArray`] and wrap as [`ScalarValue::LargeList`]
417    pub fn build_large_list_scalar(self) -> ScalarValue {
418        ScalarValue::LargeList(Arc::new(self.build_large_list_array()))
419    }
420
421    /// Build a single element [`FixedSizeListArray`]
422    pub fn build_fixed_size_list_array(self, list_size: usize) -> FixedSizeListArray {
423        let (field, arr) = self.into_field_and_arr();
424        FixedSizeListArray::new(field, list_size as i32, arr, None)
425    }
426
427    /// Build a single element [`FixedSizeListArray`] and wrap as [`ScalarValue::FixedSizeList`]
428    pub fn build_fixed_size_list_scalar(self, list_size: usize) -> ScalarValue {
429        ScalarValue::FixedSizeList(Arc::new(self.build_fixed_size_list_array(list_size)))
430    }
431
432    /// Helper function: convert this builder into a tuple of field and array
433    fn into_field_and_arr(self) -> (Arc<Field>, ArrayRef) {
434        let Self {
435            arr,
436            nullable,
437            field_name,
438        } = self;
439        let data_type = arr.data_type().to_owned();
440        let field = match field_name {
441            Some(name) => Field::new(name, data_type, nullable),
442            None => Field::new_list_field(data_type, nullable),
443        };
444        (Arc::new(field), arr)
445    }
446}
447
448/// Wrap an array into a single element `ListArray`.
449/// For example `[1, 2, 3]` would be converted into `[[1, 2, 3]]`
450/// The field in the list array is nullable.
451#[deprecated(
452    since = "44.0.0",
453    note = "please use `SingleRowListArrayBuilder` instead"
454)]
455pub fn array_into_list_array_nullable(arr: ArrayRef) -> ListArray {
456    SingleRowListArrayBuilder::new(arr)
457        .with_nullable(true)
458        .build_list_array()
459}
460
461/// Wrap an array into a single element `ListArray`.
462/// For example `[1, 2, 3]` would be converted into `[[1, 2, 3]]`
463#[deprecated(
464    since = "44.0.0",
465    note = "please use `SingleRowListArrayBuilder` instead"
466)]
467pub fn array_into_list_array(arr: ArrayRef, nullable: bool) -> ListArray {
468    SingleRowListArrayBuilder::new(arr)
469        .with_nullable(nullable)
470        .build_list_array()
471}
472
473#[deprecated(
474    since = "44.0.0",
475    note = "please use `SingleRowListArrayBuilder` instead"
476)]
477pub fn array_into_list_array_with_field_name(
478    arr: ArrayRef,
479    nullable: bool,
480    field_name: &str,
481) -> ListArray {
482    SingleRowListArrayBuilder::new(arr)
483        .with_nullable(nullable)
484        .with_field_name(Some(field_name.to_string()))
485        .build_list_array()
486}
487
488/// Wrap an array into a single element `LargeListArray`.
489/// For example `[1, 2, 3]` would be converted into `[[1, 2, 3]]`
490#[deprecated(
491    since = "44.0.0",
492    note = "please use `SingleRowListArrayBuilder` instead"
493)]
494pub fn array_into_large_list_array(arr: ArrayRef) -> LargeListArray {
495    SingleRowListArrayBuilder::new(arr).build_large_list_array()
496}
497
498#[deprecated(
499    since = "44.0.0",
500    note = "please use `SingleRowListArrayBuilder` instead"
501)]
502pub fn array_into_large_list_array_with_field_name(
503    arr: ArrayRef,
504    field_name: &str,
505) -> LargeListArray {
506    SingleRowListArrayBuilder::new(arr)
507        .with_field_name(Some(field_name.to_string()))
508        .build_large_list_array()
509}
510
511#[deprecated(
512    since = "44.0.0",
513    note = "please use `SingleRowListArrayBuilder` instead"
514)]
515pub fn array_into_fixed_size_list_array(
516    arr: ArrayRef,
517    list_size: usize,
518) -> FixedSizeListArray {
519    SingleRowListArrayBuilder::new(arr).build_fixed_size_list_array(list_size)
520}
521
522#[deprecated(
523    since = "44.0.0",
524    note = "please use `SingleRowListArrayBuilder` instead"
525)]
526pub fn array_into_fixed_size_list_array_with_field_name(
527    arr: ArrayRef,
528    list_size: usize,
529    field_name: &str,
530) -> FixedSizeListArray {
531    SingleRowListArrayBuilder::new(arr)
532        .with_field_name(Some(field_name.to_string()))
533        .build_fixed_size_list_array(list_size)
534}
535
536/// Wrap arrays into a single element `ListArray`.
537///
538/// Example:
539/// ```
540/// use arrow::array::{Int32Array, ListArray, ArrayRef};
541/// use arrow::datatypes::{Int32Type, Field};
542/// use std::sync::Arc;
543///
544/// let arr1 = Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef;
545/// let arr2 = Arc::new(Int32Array::from(vec![4, 5, 6])) as ArrayRef;
546///
547/// let list_arr = datafusion_common::utils::arrays_into_list_array([arr1, arr2]).unwrap();
548///
549/// let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(
550///    vec![
551///     Some(vec![Some(1), Some(2), Some(3)]),
552///     Some(vec![Some(4), Some(5), Some(6)]),
553///    ]
554/// );
555///
556/// assert_eq!(list_arr, expected);
557pub fn arrays_into_list_array(
558    arr: impl IntoIterator<Item = ArrayRef>,
559) -> Result<ListArray> {
560    let arr = arr.into_iter().collect::<Vec<_>>();
561    if arr.is_empty() {
562        return _internal_err!("Cannot wrap empty array into list array");
563    }
564
565    let lens = arr.iter().map(|x| x.len()).collect::<Vec<_>>();
566    // Assume data type is consistent
567    let data_type = arr[0].data_type().to_owned();
568    let values = arr.iter().map(|x| x.as_ref()).collect::<Vec<_>>();
569    Ok(ListArray::new(
570        Arc::new(Field::new_list_field(data_type, true)),
571        OffsetBuffer::from_lengths(lens),
572        arrow::compute::concat(values.as_slice())?,
573        None,
574    ))
575}
576
577/// Helper function to convert a ListArray into a vector of ArrayRefs.
578pub fn list_to_arrays<O: OffsetSizeTrait>(a: &ArrayRef) -> Vec<ArrayRef> {
579    a.as_list::<O>().iter().flatten().collect::<Vec<_>>()
580}
581
582/// Helper function to convert a FixedSizeListArray into a vector of ArrayRefs.
583pub fn fixed_size_list_to_arrays(a: &ArrayRef) -> Vec<ArrayRef> {
584    a.as_fixed_size_list().iter().flatten().collect::<Vec<_>>()
585}
586
587/// Get the base type of a data type.
588///
589/// Example
590/// ```
591/// use arrow::datatypes::{DataType, Field};
592/// use datafusion_common::utils::base_type;
593/// use std::sync::Arc;
594///
595/// let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
596/// assert_eq!(base_type(&data_type), DataType::Int32);
597///
598/// let data_type = DataType::Int32;
599/// assert_eq!(base_type(&data_type), DataType::Int32);
600/// ```
601pub fn base_type(data_type: &DataType) -> DataType {
602    match data_type {
603        DataType::List(field)
604        | DataType::LargeList(field)
605        | DataType::FixedSizeList(field, _) => base_type(field.data_type()),
606        _ => data_type.to_owned(),
607    }
608}
609
610/// Information about how to coerce lists.
611#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
612pub enum ListCoercion {
613    /// [`DataType::FixedSizeList`] should be coerced to [`DataType::List`].
614    FixedSizedListToList,
615}
616
617/// A helper function to coerce base type in List.
618///
619/// Example
620/// ```
621/// use arrow::datatypes::{DataType, Field};
622/// use datafusion_common::utils::coerced_type_with_base_type_only;
623/// use std::sync::Arc;
624///
625/// let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
626/// let base_type = DataType::Float64;
627/// let coerced_type = coerced_type_with_base_type_only(&data_type, &base_type, None);
628/// assert_eq!(coerced_type, DataType::List(Arc::new(Field::new_list_field(DataType::Float64, true))));
629pub fn coerced_type_with_base_type_only(
630    data_type: &DataType,
631    base_type: &DataType,
632    array_coercion: Option<&ListCoercion>,
633) -> DataType {
634    match (data_type, array_coercion) {
635        (DataType::List(field), _)
636        | (DataType::FixedSizeList(field, _), Some(ListCoercion::FixedSizedListToList)) =>
637        {
638            let field_type = coerced_type_with_base_type_only(
639                field.data_type(),
640                base_type,
641                array_coercion,
642            );
643
644            DataType::List(Arc::new(Field::new(
645                field.name(),
646                field_type,
647                field.is_nullable(),
648            )))
649        }
650        (DataType::FixedSizeList(field, len), _) => {
651            let field_type = coerced_type_with_base_type_only(
652                field.data_type(),
653                base_type,
654                array_coercion,
655            );
656
657            DataType::FixedSizeList(
658                Arc::new(Field::new(field.name(), field_type, field.is_nullable())),
659                *len,
660            )
661        }
662        (DataType::LargeList(field), _) => {
663            let field_type = coerced_type_with_base_type_only(
664                field.data_type(),
665                base_type,
666                array_coercion,
667            );
668
669            DataType::LargeList(Arc::new(Field::new(
670                field.name(),
671                field_type,
672                field.is_nullable(),
673            )))
674        }
675
676        _ => base_type.clone(),
677    }
678}
679
680/// Recursively coerce and `FixedSizeList` elements to `List`
681pub fn coerced_fixed_size_list_to_list(data_type: &DataType) -> DataType {
682    match data_type {
683        DataType::List(field) | DataType::FixedSizeList(field, _) => {
684            let field_type = coerced_fixed_size_list_to_list(field.data_type());
685
686            DataType::List(Arc::new(Field::new(
687                field.name(),
688                field_type,
689                field.is_nullable(),
690            )))
691        }
692        DataType::LargeList(field) => {
693            let field_type = coerced_fixed_size_list_to_list(field.data_type());
694
695            DataType::LargeList(Arc::new(Field::new(
696                field.name(),
697                field_type,
698                field.is_nullable(),
699            )))
700        }
701
702        _ => data_type.clone(),
703    }
704}
705
706/// Compute the number of dimensions in a list data type.
707pub fn list_ndims(data_type: &DataType) -> u64 {
708    match data_type {
709        DataType::List(field)
710        | DataType::LargeList(field)
711        | DataType::FixedSizeList(field, _) => 1 + list_ndims(field.data_type()),
712        _ => 0,
713    }
714}
715
716/// Adopted from strsim-rs for string similarity metrics
717pub mod datafusion_strsim {
718    // Source: https://github.com/dguo/strsim-rs/blob/master/src/lib.rs
719    // License: https://github.com/dguo/strsim-rs/blob/master/LICENSE
720    use std::cmp::min;
721    use std::str::Chars;
722
723    struct StringWrapper<'a>(&'a str);
724
725    impl<'b> IntoIterator for &StringWrapper<'b> {
726        type Item = char;
727        type IntoIter = Chars<'b>;
728
729        fn into_iter(self) -> Self::IntoIter {
730            self.0.chars()
731        }
732    }
733
734    /// Calculates the minimum number of insertions, deletions, and substitutions
735    /// required to change one sequence into the other.
736    fn generic_levenshtein<'a, 'b, Iter1, Iter2, Elem1, Elem2>(
737        a: &'a Iter1,
738        b: &'b Iter2,
739    ) -> usize
740    where
741        &'a Iter1: IntoIterator<Item = Elem1>,
742        &'b Iter2: IntoIterator<Item = Elem2>,
743        Elem1: PartialEq<Elem2>,
744    {
745        let b_len = b.into_iter().count();
746
747        if a.into_iter().next().is_none() {
748            return b_len;
749        }
750
751        let mut cache: Vec<usize> = (1..b_len + 1).collect();
752
753        let mut result = 0;
754
755        for (i, a_elem) in a.into_iter().enumerate() {
756            result = i + 1;
757            let mut distance_b = i;
758
759            for (j, b_elem) in b.into_iter().enumerate() {
760                let cost = if a_elem == b_elem { 0usize } else { 1usize };
761                let distance_a = distance_b + cost;
762                distance_b = cache[j];
763                result = min(result + 1, min(distance_a, distance_b + 1));
764                cache[j] = result;
765            }
766        }
767
768        result
769    }
770
771    /// Calculates the minimum number of insertions, deletions, and substitutions
772    /// required to change one string into the other.
773    ///
774    /// ```
775    /// use datafusion_common::utils::datafusion_strsim::levenshtein;
776    ///
777    /// assert_eq!(3, levenshtein("kitten", "sitting"));
778    /// ```
779    pub fn levenshtein(a: &str, b: &str) -> usize {
780        generic_levenshtein(&StringWrapper(a), &StringWrapper(b))
781    }
782
783    /// Calculates the normalized Levenshtein distance between two strings.
784    /// The normalized distance is a value between 0.0 and 1.0, where 1.0 indicates
785    /// that the strings are identical and 0.0 indicates no similarity.
786    ///
787    /// ```
788    /// use datafusion_common::utils::datafusion_strsim::normalized_levenshtein;
789    ///
790    /// assert!((normalized_levenshtein("kitten", "sitting") - 0.57142).abs() < 0.00001);
791    ///
792    /// assert!(normalized_levenshtein("", "second").abs() < 0.00001);
793    ///
794    /// assert!((normalized_levenshtein("kitten", "sitten") - 0.833).abs() < 0.001);
795    /// ```
796    pub fn normalized_levenshtein(a: &str, b: &str) -> f64 {
797        if a.is_empty() && b.is_empty() {
798            return 1.0;
799        }
800        1.0 - (levenshtein(a, b) as f64)
801            / (a.chars().count().max(b.chars().count()) as f64)
802    }
803}
804
805/// Merges collections `first` and `second`, removes duplicates and sorts the
806/// result, returning it as a [`Vec`].
807pub fn merge_and_order_indices<T: Borrow<usize>, S: Borrow<usize>>(
808    first: impl IntoIterator<Item = T>,
809    second: impl IntoIterator<Item = S>,
810) -> Vec<usize> {
811    let mut result: Vec<_> = first
812        .into_iter()
813        .map(|e| *e.borrow())
814        .chain(second.into_iter().map(|e| *e.borrow()))
815        .collect::<HashSet<_>>()
816        .into_iter()
817        .collect();
818    result.sort();
819    result
820}
821
822/// Calculates the set difference between sequences `first` and `second`,
823/// returning the result as a [`Vec`]. Preserves the ordering of `first`.
824pub fn set_difference<T: Borrow<usize>, S: Borrow<usize>>(
825    first: impl IntoIterator<Item = T>,
826    second: impl IntoIterator<Item = S>,
827) -> Vec<usize> {
828    let set: HashSet<_> = second.into_iter().map(|e| *e.borrow()).collect();
829    first
830        .into_iter()
831        .map(|e| *e.borrow())
832        .filter(|e| !set.contains(e))
833        .collect()
834}
835
836/// Checks whether the given index sequence is monotonically non-decreasing.
837#[deprecated(since = "45.0.0", note = "Use std::Iterator::is_sorted instead")]
838pub fn is_sorted<T: Borrow<usize>>(sequence: impl IntoIterator<Item = T>) -> bool {
839    // TODO: Remove this function when `is_sorted` graduates from Rust nightly.
840    let mut previous = 0;
841    for item in sequence.into_iter() {
842        let current = *item.borrow();
843        if current < previous {
844            return false;
845        }
846        previous = current;
847    }
848    true
849}
850
851/// Find indices of each element in `targets` inside `items`. If one of the
852/// elements is absent in `items`, returns an error.
853pub fn find_indices<T: PartialEq, S: Borrow<T>>(
854    items: &[T],
855    targets: impl IntoIterator<Item = S>,
856) -> Result<Vec<usize>> {
857    targets
858        .into_iter()
859        .map(|target| items.iter().position(|e| target.borrow().eq(e)))
860        .collect::<Option<_>>()
861        .ok_or_else(|| DataFusionError::Execution("Target not found".to_string()))
862}
863
864/// Transposes the given vector of vectors.
865pub fn transpose<T>(original: Vec<Vec<T>>) -> Vec<Vec<T>> {
866    match original.as_slice() {
867        [] => vec![],
868        [first, ..] => {
869            let mut result = (0..first.len()).map(|_| vec![]).collect::<Vec<_>>();
870            for row in original {
871                for (item, transposed_row) in row.into_iter().zip(&mut result) {
872                    transposed_row.push(item);
873                }
874            }
875            result
876        }
877    }
878}
879
880/// Computes the `skip` and `fetch` parameters of a single limit that would be
881/// equivalent to two consecutive limits with the given `skip`/`fetch` parameters.
882///
883/// There are multiple cases to consider:
884///
885/// # Case 0: Parent and child are disjoint (`child_fetch <= skip`).
886///
887/// ```text
888///   Before merging:
889///                     |........skip........|---fetch-->|     Parent limit
890///    |...child_skip...|---child_fetch-->|                    Child limit
891/// ```
892///
893///   After merging:
894/// ```text
895///    |.........(child_skip + skip).........|
896/// ```
897///
898/// # Case 1: Parent is beyond child's range (`skip < child_fetch <= skip + fetch`).
899///
900///   Before merging:
901/// ```text
902///                     |...skip...|------------fetch------------>|   Parent limit
903///    |...child_skip...|-------------child_fetch------------>|       Child limit
904/// ```
905///
906///   After merging:
907/// ```text
908///    |....(child_skip + skip)....|---(child_fetch - skip)-->|
909/// ```
910///
911///  # Case 2: Parent is within child's range (`skip + fetch < child_fetch`).
912///
913///   Before merging:
914/// ```text
915///                     |...skip...|---fetch-->|                   Parent limit
916///    |...child_skip...|-------------child_fetch------------>|    Child limit
917/// ```
918///
919///   After merging:
920/// ```text
921///    |....(child_skip + skip)....|---fetch-->|
922/// ```
923pub fn combine_limit(
924    parent_skip: usize,
925    parent_fetch: Option<usize>,
926    child_skip: usize,
927    child_fetch: Option<usize>,
928) -> (usize, Option<usize>) {
929    let combined_skip = child_skip.saturating_add(parent_skip);
930
931    let combined_fetch = match (parent_fetch, child_fetch) {
932        (Some(parent_fetch), Some(child_fetch)) => {
933            Some(min(parent_fetch, child_fetch.saturating_sub(parent_skip)))
934        }
935        (Some(parent_fetch), None) => Some(parent_fetch),
936        (None, Some(child_fetch)) => Some(child_fetch.saturating_sub(parent_skip)),
937        (None, None) => None,
938    };
939
940    (combined_skip, combined_fetch)
941}
942
943/// Returns the estimated number of threads available for parallel execution.
944///
945/// This is a wrapper around `std::thread::available_parallelism`, providing a default value
946/// of `1` if the system's parallelism cannot be determined.
947pub fn get_available_parallelism() -> usize {
948    available_parallelism()
949        .unwrap_or(NonZero::new(1).expect("literal value `1` shouldn't be zero"))
950        .get()
951}
952
953/// Converts a collection of function arguments into an fixed-size array of length N
954/// producing a reasonable error message in case of unexpected number of arguments.
955///
956/// # Example
957/// ```
958/// # use datafusion_common::Result;
959/// # use datafusion_common::utils::take_function_args;
960/// # use datafusion_common::ScalarValue;
961/// fn my_function(args: &[ScalarValue]) -> Result<()> {
962///   // function expects 2 args, so create a 2-element array
963///   let [arg1, arg2] = take_function_args("my_function", args)?;
964///   // ... do stuff..
965///   Ok(())
966/// }
967///
968/// // Calling the function with 1 argument produces an error:
969/// let args = vec![ScalarValue::Int32(Some(10))];
970/// let err = my_function(&args).unwrap_err();
971/// assert_eq!(err.to_string(), "Execution error: my_function function requires 2 arguments, got 1");
972/// // Calling the function with 2 arguments works great
973/// let args = vec![ScalarValue::Int32(Some(10)), ScalarValue::Int32(Some(20))];
974/// my_function(&args).unwrap();
975/// ```
976pub fn take_function_args<const N: usize, T>(
977    function_name: &str,
978    args: impl IntoIterator<Item = T>,
979) -> Result<[T; N]> {
980    let args = args.into_iter().collect::<Vec<_>>();
981    args.try_into().map_err(|v: Vec<T>| {
982        _exec_datafusion_err!(
983            "{} function requires {} {}, got {}",
984            function_name,
985            N,
986            if N == 1 { "argument" } else { "arguments" },
987            v.len()
988        )
989    })
990}
991
992#[cfg(test)]
993mod tests {
994    use super::*;
995    use crate::ScalarValue::Null;
996    use arrow::array::Float64Array;
997    use sqlparser::tokenizer::Span;
998
999    #[test]
1000    fn test_bisect_linear_left_and_right() -> Result<()> {
1001        let arrays: Vec<ArrayRef> = vec![
1002            Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 9., 10.])),
1003            Arc::new(Float64Array::from(vec![2.0, 3.0, 3.0, 4.0, 5.0])),
1004            Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 10., 11.0])),
1005            Arc::new(Float64Array::from(vec![15.0, 13.0, 8.0, 5., 0.0])),
1006        ];
1007        let search_tuple: Vec<ScalarValue> = vec![
1008            ScalarValue::Float64(Some(8.0)),
1009            ScalarValue::Float64(Some(3.0)),
1010            ScalarValue::Float64(Some(8.0)),
1011            ScalarValue::Float64(Some(8.0)),
1012        ];
1013        let ords = [
1014            SortOptions {
1015                descending: false,
1016                nulls_first: true,
1017            },
1018            SortOptions {
1019                descending: false,
1020                nulls_first: true,
1021            },
1022            SortOptions {
1023                descending: false,
1024                nulls_first: true,
1025            },
1026            SortOptions {
1027                descending: true,
1028                nulls_first: true,
1029            },
1030        ];
1031        let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1032        assert_eq!(res, 2);
1033        let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1034        assert_eq!(res, 3);
1035        let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1036        assert_eq!(res, 2);
1037        let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1038        assert_eq!(res, 3);
1039        Ok(())
1040    }
1041
1042    #[test]
1043    fn vector_ord() {
1044        assert!(vec![1, 0, 0, 0, 0, 0, 0, 1] < vec![1, 0, 0, 0, 0, 0, 0, 2]);
1045        assert!(vec![1, 0, 0, 0, 0, 0, 1, 1] > vec![1, 0, 0, 0, 0, 0, 0, 2]);
1046        assert!(
1047            vec![
1048                ScalarValue::Int32(Some(2)),
1049                Null,
1050                ScalarValue::Int32(Some(0)),
1051            ] < vec![
1052                ScalarValue::Int32(Some(2)),
1053                Null,
1054                ScalarValue::Int32(Some(1)),
1055            ]
1056        );
1057        assert!(
1058            vec![
1059                ScalarValue::Int32(Some(2)),
1060                ScalarValue::Int32(None),
1061                ScalarValue::Int32(Some(0)),
1062            ] < vec![
1063                ScalarValue::Int32(Some(2)),
1064                ScalarValue::Int32(None),
1065                ScalarValue::Int32(Some(1)),
1066            ]
1067        );
1068    }
1069
1070    #[test]
1071    fn ord_same_type() {
1072        assert!((ScalarValue::Int32(Some(2)) < ScalarValue::Int32(Some(3))));
1073    }
1074
1075    #[test]
1076    fn test_bisect_linear_left_and_right_diff_sort() -> Result<()> {
1077        // Descending, left
1078        let arrays: Vec<ArrayRef> =
1079            vec![Arc::new(Float64Array::from(vec![4.0, 3.0, 2.0, 1.0, 0.0]))];
1080        let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(4.0))];
1081        let ords = [SortOptions {
1082            descending: true,
1083            nulls_first: true,
1084        }];
1085        let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1086        assert_eq!(res, 0);
1087        let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1088        assert_eq!(res, 0);
1089
1090        // Descending, right
1091        let arrays: Vec<ArrayRef> =
1092            vec![Arc::new(Float64Array::from(vec![4.0, 3.0, 2.0, 1.0, 0.0]))];
1093        let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(4.0))];
1094        let ords = [SortOptions {
1095            descending: true,
1096            nulls_first: true,
1097        }];
1098        let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1099        assert_eq!(res, 1);
1100        let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1101        assert_eq!(res, 1);
1102
1103        // Ascending, left
1104        let arrays: Vec<ArrayRef> =
1105            vec![Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 9., 10.]))];
1106        let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(7.0))];
1107        let ords = [SortOptions {
1108            descending: false,
1109            nulls_first: true,
1110        }];
1111        let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1112        assert_eq!(res, 1);
1113        let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1114        assert_eq!(res, 1);
1115
1116        // Ascending, right
1117        let arrays: Vec<ArrayRef> =
1118            vec![Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 9., 10.]))];
1119        let search_tuple: Vec<ScalarValue> = vec![ScalarValue::Float64(Some(7.0))];
1120        let ords = [SortOptions {
1121            descending: false,
1122            nulls_first: true,
1123        }];
1124        let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1125        assert_eq!(res, 2);
1126        let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1127        assert_eq!(res, 2);
1128
1129        let arrays: Vec<ArrayRef> = vec![
1130            Arc::new(Float64Array::from(vec![5.0, 7.0, 8.0, 8.0, 9., 10.])),
1131            Arc::new(Float64Array::from(vec![10.0, 9.0, 8.0, 7.5, 7., 6.])),
1132        ];
1133        let search_tuple: Vec<ScalarValue> = vec![
1134            ScalarValue::Float64(Some(8.0)),
1135            ScalarValue::Float64(Some(8.0)),
1136        ];
1137        let ords = [
1138            SortOptions {
1139                descending: false,
1140                nulls_first: true,
1141            },
1142            SortOptions {
1143                descending: true,
1144                nulls_first: true,
1145            },
1146        ];
1147        let res = bisect::<false>(&arrays, &search_tuple, &ords)?;
1148        assert_eq!(res, 3);
1149        let res = linear_search::<false>(&arrays, &search_tuple, &ords)?;
1150        assert_eq!(res, 3);
1151
1152        let res = bisect::<true>(&arrays, &search_tuple, &ords)?;
1153        assert_eq!(res, 2);
1154        let res = linear_search::<true>(&arrays, &search_tuple, &ords)?;
1155        assert_eq!(res, 2);
1156        Ok(())
1157    }
1158
1159    #[test]
1160    fn test_evaluate_partition_ranges() -> Result<()> {
1161        let arrays: Vec<ArrayRef> = vec![
1162            Arc::new(Float64Array::from(vec![1.0, 1.0, 1.0, 2.0, 2.0, 2.0])),
1163            Arc::new(Float64Array::from(vec![4.0, 4.0, 3.0, 2.0, 1.0, 1.0])),
1164        ];
1165        let n_row = arrays[0].len();
1166        let options: Vec<SortOptions> = vec![
1167            SortOptions {
1168                descending: false,
1169                nulls_first: false,
1170            },
1171            SortOptions {
1172                descending: true,
1173                nulls_first: false,
1174            },
1175        ];
1176        let sort_columns = arrays
1177            .into_iter()
1178            .zip(options)
1179            .map(|(values, options)| SortColumn {
1180                values,
1181                options: Some(options),
1182            })
1183            .collect::<Vec<_>>();
1184        let ranges = evaluate_partition_ranges(n_row, &sort_columns)?;
1185        assert_eq!(ranges.len(), 4);
1186        assert_eq!(ranges[0], Range { start: 0, end: 2 });
1187        assert_eq!(ranges[1], Range { start: 2, end: 3 });
1188        assert_eq!(ranges[2], Range { start: 3, end: 4 });
1189        assert_eq!(ranges[3], Range { start: 4, end: 6 });
1190        Ok(())
1191    }
1192
1193    #[test]
1194    fn test_quote_identifier() -> Result<()> {
1195        let cases = vec![
1196            ("foo", r#"foo"#),
1197            ("_foo", r#"_foo"#),
1198            ("foo_bar", r#"foo_bar"#),
1199            ("foo-bar", r#""foo-bar""#),
1200            // name itself has a period, needs to be quoted
1201            ("foo.bar", r#""foo.bar""#),
1202            ("Foo", r#""Foo""#),
1203            ("Foo.Bar", r#""Foo.Bar""#),
1204            // name starting with a number needs to be quoted
1205            ("test1", r#"test1"#),
1206            ("1test", r#""1test""#),
1207        ];
1208
1209        for (identifier, quoted_identifier) in cases {
1210            println!("input: \n{identifier}\nquoted_identifier:\n{quoted_identifier}");
1211
1212            assert_eq!(quote_identifier(identifier), quoted_identifier);
1213
1214            // When parsing the quoted identifier, it should be a
1215            // a single identifier without normalization, and not in multiple parts
1216            let quote_style = if quoted_identifier.starts_with('"') {
1217                Some('"')
1218            } else {
1219                None
1220            };
1221
1222            let expected_parsed = vec![Ident {
1223                value: identifier.to_string(),
1224                quote_style,
1225                span: Span::empty(),
1226            }];
1227
1228            assert_eq!(
1229                parse_identifiers(quoted_identifier).unwrap(),
1230                expected_parsed
1231            );
1232        }
1233
1234        Ok(())
1235    }
1236
1237    #[test]
1238    fn test_get_at_indices() -> Result<()> {
1239        let in_vec = vec![1, 2, 3, 4, 5, 6, 7];
1240        assert_eq!(get_at_indices(&in_vec, [0, 2])?, vec![1, 3]);
1241        assert_eq!(get_at_indices(&in_vec, [4, 2])?, vec![5, 3]);
1242        // 7 is outside the range
1243        assert!(get_at_indices(&in_vec, [7]).is_err());
1244        Ok(())
1245    }
1246
1247    #[test]
1248    fn test_longest_consecutive_prefix() {
1249        assert_eq!(longest_consecutive_prefix([0, 3, 4]), 1);
1250        assert_eq!(longest_consecutive_prefix([0, 1, 3, 4]), 2);
1251        assert_eq!(longest_consecutive_prefix([0, 1, 2, 3, 4]), 5);
1252        assert_eq!(longest_consecutive_prefix([1, 2, 3, 4]), 0);
1253    }
1254
1255    #[test]
1256    fn test_merge_and_order_indices() {
1257        assert_eq!(
1258            merge_and_order_indices([0, 3, 4], [1, 3, 5]),
1259            vec![0, 1, 3, 4, 5]
1260        );
1261        // Result should be ordered, even if inputs are not
1262        assert_eq!(
1263            merge_and_order_indices([3, 0, 4], [5, 1, 3]),
1264            vec![0, 1, 3, 4, 5]
1265        );
1266    }
1267
1268    #[test]
1269    fn test_set_difference() {
1270        assert_eq!(set_difference([0, 3, 4], [1, 2]), vec![0, 3, 4]);
1271        assert_eq!(set_difference([0, 3, 4], [1, 2, 4]), vec![0, 3]);
1272        // return value should have same ordering with the in1
1273        assert_eq!(set_difference([3, 4, 0], [1, 2, 4]), vec![3, 0]);
1274        assert_eq!(set_difference([0, 3, 4], [4, 1, 2]), vec![0, 3]);
1275        assert_eq!(set_difference([3, 4, 0], [4, 1, 2]), vec![3, 0]);
1276    }
1277
1278    #[test]
1279    #[expect(deprecated)]
1280    fn test_is_sorted() {
1281        assert!(is_sorted::<usize>([]));
1282        assert!(is_sorted([0]));
1283        assert!(is_sorted([0, 3, 4]));
1284        assert!(is_sorted([0, 1, 2]));
1285        assert!(is_sorted([0, 1, 4]));
1286        assert!(is_sorted([0usize; 0]));
1287        assert!(is_sorted([1, 2]));
1288        assert!(!is_sorted([3, 2]));
1289    }
1290
1291    #[test]
1292    fn test_find_indices() -> Result<()> {
1293        assert_eq!(find_indices(&[0, 3, 4], [0, 3, 4])?, vec![0, 1, 2]);
1294        assert_eq!(find_indices(&[0, 3, 4], [0, 4, 3])?, vec![0, 2, 1]);
1295        assert_eq!(find_indices(&[3, 0, 4], [0, 3])?, vec![1, 0]);
1296        assert!(find_indices(&[0, 3], [0, 3, 4]).is_err());
1297        assert!(find_indices(&[0, 3, 4], [0, 2]).is_err());
1298        Ok(())
1299    }
1300
1301    #[test]
1302    fn test_transpose() -> Result<()> {
1303        let in_data = vec![vec![1, 2, 3], vec![4, 5, 6]];
1304        let transposed = transpose(in_data);
1305        let expected = vec![vec![1, 4], vec![2, 5], vec![3, 6]];
1306        assert_eq!(expected, transposed);
1307        Ok(())
1308    }
1309}