datafusion_functions_nested/
lib.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#![doc(
19    html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
20    html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
21)]
22#![cfg_attr(docsrs, feature(doc_cfg))]
23// Make sure fast / cheap clones on Arc are explicit:
24// https://github.com/apache/datafusion/issues/11143
25#![deny(clippy::clone_on_ref_ptr)]
26#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
27// https://github.com/apache/datafusion/issues/18881
28#![deny(clippy::allow_attributes)]
29
30//! Nested type Functions for [DataFusion].
31//!
32//! This crate contains a collection of nested type functions implemented using the
33//! extension API.
34//!
35//! [DataFusion]: https://crates.io/crates/datafusion
36//!
37//! You can register the functions in this crate using the [`register_all`] function.
38
39#[macro_use]
40pub mod macros;
41
42pub mod array_has;
43pub mod cardinality;
44pub mod concat;
45pub mod dimension;
46pub mod distance;
47pub mod empty;
48pub mod except;
49pub mod expr_ext;
50pub mod extract;
51pub mod flatten;
52pub mod length;
53pub mod make_array;
54pub mod map;
55pub mod map_entries;
56pub mod map_extract;
57pub mod map_keys;
58pub mod map_values;
59pub mod min_max;
60pub mod planner;
61pub mod position;
62pub mod range;
63pub mod remove;
64pub mod repeat;
65pub mod replace;
66pub mod resize;
67pub mod reverse;
68pub mod set_ops;
69pub mod sort;
70pub mod string;
71pub mod utils;
72
73use datafusion_common::Result;
74use datafusion_execution::FunctionRegistry;
75use datafusion_expr::ScalarUDF;
76use log::debug;
77use std::sync::Arc;
78
79/// Fluent-style API for creating `Expr`s
80pub mod expr_fn {
81    pub use super::array_has::array_has;
82    pub use super::array_has::array_has_all;
83    pub use super::array_has::array_has_any;
84    pub use super::cardinality::cardinality;
85    pub use super::concat::array_append;
86    pub use super::concat::array_concat;
87    pub use super::concat::array_prepend;
88    pub use super::dimension::array_dims;
89    pub use super::dimension::array_ndims;
90    pub use super::distance::array_distance;
91    pub use super::empty::array_empty;
92    pub use super::except::array_except;
93    pub use super::extract::array_any_value;
94    pub use super::extract::array_element;
95    pub use super::extract::array_pop_back;
96    pub use super::extract::array_pop_front;
97    pub use super::extract::array_slice;
98    pub use super::flatten::flatten;
99    pub use super::length::array_length;
100    pub use super::make_array::make_array;
101    pub use super::map_entries::map_entries;
102    pub use super::map_extract::map_extract;
103    pub use super::map_keys::map_keys;
104    pub use super::map_values::map_values;
105    pub use super::min_max::array_max;
106    pub use super::min_max::array_min;
107    pub use super::position::array_position;
108    pub use super::position::array_positions;
109    pub use super::range::gen_series;
110    pub use super::range::range;
111    pub use super::remove::array_remove;
112    pub use super::remove::array_remove_all;
113    pub use super::remove::array_remove_n;
114    pub use super::repeat::array_repeat;
115    pub use super::replace::array_replace;
116    pub use super::replace::array_replace_all;
117    pub use super::replace::array_replace_n;
118    pub use super::resize::array_resize;
119    pub use super::reverse::array_reverse;
120    pub use super::set_ops::array_distinct;
121    pub use super::set_ops::array_intersect;
122    pub use super::set_ops::array_union;
123    pub use super::sort::array_sort;
124    pub use super::string::array_to_string;
125    pub use super::string::string_to_array;
126}
127
128/// Return all default nested type functions
129pub fn all_default_nested_functions() -> Vec<Arc<ScalarUDF>> {
130    vec![
131        string::array_to_string_udf(),
132        string::string_to_array_udf(),
133        range::range_udf(),
134        range::gen_series_udf(),
135        dimension::array_dims_udf(),
136        cardinality::cardinality_udf(),
137        dimension::array_ndims_udf(),
138        concat::array_append_udf(),
139        concat::array_prepend_udf(),
140        concat::array_concat_udf(),
141        except::array_except_udf(),
142        extract::array_element_udf(),
143        extract::array_pop_back_udf(),
144        extract::array_pop_front_udf(),
145        extract::array_slice_udf(),
146        extract::array_any_value_udf(),
147        make_array::make_array_udf(),
148        array_has::array_has_udf(),
149        array_has::array_has_all_udf(),
150        array_has::array_has_any_udf(),
151        empty::array_empty_udf(),
152        length::array_length_udf(),
153        distance::array_distance_udf(),
154        flatten::flatten_udf(),
155        min_max::array_max_udf(),
156        min_max::array_min_udf(),
157        sort::array_sort_udf(),
158        repeat::array_repeat_udf(),
159        resize::array_resize_udf(),
160        reverse::array_reverse_udf(),
161        set_ops::array_distinct_udf(),
162        set_ops::array_intersect_udf(),
163        set_ops::array_union_udf(),
164        position::array_position_udf(),
165        position::array_positions_udf(),
166        remove::array_remove_udf(),
167        remove::array_remove_all_udf(),
168        remove::array_remove_n_udf(),
169        replace::array_replace_n_udf(),
170        replace::array_replace_all_udf(),
171        replace::array_replace_udf(),
172        map::map_udf(),
173        map_entries::map_entries_udf(),
174        map_extract::map_extract_udf(),
175        map_keys::map_keys_udf(),
176        map_values::map_values_udf(),
177    ]
178}
179
180/// Registers all enabled packages with a [`FunctionRegistry`]
181pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {
182    let functions: Vec<Arc<ScalarUDF>> = all_default_nested_functions();
183    functions.into_iter().try_for_each(|udf| {
184        let existing_udf = registry.register_udf(udf)?;
185        if let Some(existing_udf) = existing_udf {
186            debug!("Overwrite existing UDF: {}", existing_udf.name());
187        }
188        Ok(()) as Result<()>
189    })?;
190
191    Ok(())
192}
193
194#[cfg(test)]
195mod tests {
196    use crate::all_default_nested_functions;
197    use datafusion_common::Result;
198    use std::collections::HashSet;
199
200    #[test]
201    fn test_no_duplicate_name() -> Result<()> {
202        let mut names = HashSet::new();
203        for func in all_default_nested_functions() {
204            assert!(
205                names.insert(func.name().to_string().to_lowercase()),
206                "duplicate function name: {}",
207                func.name()
208            );
209            for alias in func.aliases() {
210                assert!(
211                    names.insert(alias.to_string().to_lowercase()),
212                    "duplicate function name: {alias}"
213                );
214            }
215        }
216        Ok(())
217    }
218}