datafusion_functions_nested/
lib.rs1#![doc(
19 html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
20 html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
21)]
22#![cfg_attr(docsrs, feature(doc_cfg))]
23#![deny(clippy::clone_on_ref_ptr)]
26#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
27
28#[macro_use]
38pub mod macros;
39
40#[macro_use]
41pub mod macros_lambda;
42
43pub mod array_any_match;
44pub(crate) mod lambda_utils;
45
46pub mod array_compact;
47pub mod array_filter;
48pub mod array_has;
49pub mod array_normalize;
50pub mod array_transform;
51pub mod arrays_zip;
52pub mod cardinality;
53pub mod concat;
54pub mod cosine_distance;
55pub mod dimension;
56pub mod distance;
57pub mod empty;
58pub mod except;
59pub mod expr_ext;
60pub mod extract;
61pub mod flatten;
62pub mod inner_product;
63pub mod length;
64pub mod make_array;
65pub mod map;
66pub mod map_entries;
67pub mod map_extract;
68pub mod map_keys;
69pub mod map_values;
70pub mod min_max;
71pub mod planner;
72pub mod position;
73pub mod range;
74pub mod remove;
75pub mod repeat;
76pub mod replace;
77pub mod resize;
78pub mod reverse;
79pub mod set_ops;
80pub mod sort;
81pub mod string;
82pub mod utils;
83
84use datafusion_common::Result;
85use datafusion_execution::FunctionRegistry;
86use datafusion_expr::{HigherOrderUDF, ScalarUDF};
87use log::debug;
88use std::sync::Arc;
89
90pub mod expr_fn {
92 pub use super::array_any_match::array_any_match;
93 pub use super::array_compact::array_compact;
94 pub use super::array_filter::array_filter;
95 pub use super::array_has::array_has;
96 pub use super::array_has::array_has_all;
97 pub use super::array_has::array_has_any;
98 pub use super::array_normalize::array_normalize;
99 pub use super::array_transform::array_transform;
100 pub use super::arrays_zip::arrays_zip;
101 pub use super::cardinality::cardinality;
102 pub use super::concat::array_append;
103 pub use super::concat::array_concat;
104 pub use super::concat::array_prepend;
105 pub use super::cosine_distance::cosine_distance;
106 pub use super::dimension::array_dims;
107 pub use super::dimension::array_ndims;
108 pub use super::distance::array_distance;
109 pub use super::empty::array_empty;
110 pub use super::except::array_except;
111 pub use super::extract::array_any_value;
112 pub use super::extract::array_element;
113 pub use super::extract::array_pop_back;
114 pub use super::extract::array_pop_front;
115 pub use super::extract::array_slice;
116 pub use super::flatten::flatten;
117 pub use super::inner_product::inner_product;
118 pub use super::length::array_length;
119 pub use super::make_array::make_array;
120 pub use super::map_entries::map_entries;
121 pub use super::map_extract::map_extract;
122 pub use super::map_keys::map_keys;
123 pub use super::map_values::map_values;
124 pub use super::min_max::array_max;
125 pub use super::min_max::array_min;
126 pub use super::position::array_position;
127 pub use super::position::array_positions;
128 pub use super::range::gen_series;
129 pub use super::range::range;
130 pub use super::remove::array_remove;
131 pub use super::remove::array_remove_all;
132 pub use super::remove::array_remove_n;
133 pub use super::repeat::array_repeat;
134 pub use super::replace::array_replace;
135 pub use super::replace::array_replace_all;
136 pub use super::replace::array_replace_n;
137 pub use super::resize::array_resize;
138 pub use super::reverse::array_reverse;
139 pub use super::set_ops::array_distinct;
140 pub use super::set_ops::array_intersect;
141 pub use super::set_ops::array_union;
142 pub use super::sort::array_sort;
143 pub use super::string::array_to_string;
144 pub use super::string::string_to_array;
145}
146
147pub fn all_default_nested_functions() -> Vec<Arc<ScalarUDF>> {
149 vec![
150 array_compact::array_compact_udf(),
151 string::array_to_string_udf(),
152 string::string_to_array_udf(),
153 range::range_udf(),
154 range::gen_series_udf(),
155 dimension::array_dims_udf(),
156 cardinality::cardinality_udf(),
157 dimension::array_ndims_udf(),
158 concat::array_append_udf(),
159 concat::array_prepend_udf(),
160 concat::array_concat_udf(),
161 except::array_except_udf(),
162 extract::array_element_udf(),
163 extract::array_pop_back_udf(),
164 extract::array_pop_front_udf(),
165 extract::array_slice_udf(),
166 extract::array_any_value_udf(),
167 make_array::make_array_udf(),
168 array_has::array_has_udf(),
169 array_has::array_has_all_udf(),
170 array_has::array_has_any_udf(),
171 empty::array_empty_udf(),
172 length::array_length_udf(),
173 array_normalize::array_normalize_udf(),
174 cosine_distance::cosine_distance_udf(),
175 inner_product::inner_product_udf(),
176 distance::array_distance_udf(),
177 flatten::flatten_udf(),
178 min_max::array_max_udf(),
179 min_max::array_min_udf(),
180 sort::array_sort_udf(),
181 repeat::array_repeat_udf(),
182 resize::array_resize_udf(),
183 reverse::array_reverse_udf(),
184 set_ops::array_distinct_udf(),
185 set_ops::array_intersect_udf(),
186 set_ops::array_union_udf(),
187 arrays_zip::arrays_zip_udf(),
188 position::array_position_udf(),
189 position::array_positions_udf(),
190 remove::array_remove_udf(),
191 remove::array_remove_all_udf(),
192 remove::array_remove_n_udf(),
193 replace::array_replace_n_udf(),
194 replace::array_replace_all_udf(),
195 replace::array_replace_udf(),
196 map::map_udf(),
197 map_entries::map_entries_udf(),
198 map_extract::map_extract_udf(),
199 map_keys::map_keys_udf(),
200 map_values::map_values_udf(),
201 ]
202}
203
204pub fn all_default_higher_order_functions() -> Vec<Arc<HigherOrderUDF>> {
205 vec![
206 array_any_match::array_any_match_higher_order_function(),
207 array_filter::array_filter_higher_order_function(),
208 array_transform::array_transform_higher_order_function(),
209 ]
210}
211
212pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {
214 let functions: Vec<Arc<ScalarUDF>> = all_default_nested_functions();
215 functions.into_iter().try_for_each(|udf| {
216 let existing_udf = registry.register_udf(udf)?;
217 if let Some(existing_udf) = existing_udf {
218 debug!("Overwrite existing UDF: {}", existing_udf.name());
219 }
220 Ok(()) as Result<()>
221 })?;
222
223 let functions: Vec<Arc<HigherOrderUDF>> = all_default_higher_order_functions();
224 functions.into_iter().try_for_each(|function| {
225 let existing_function = registry.register_higher_order_function(function)?;
226 if let Some(existing_function) = existing_function {
227 debug!(
228 "Overwrite existing higher-order function: {}",
229 existing_function.name()
230 );
231 }
232 Ok(()) as Result<()>
233 })?;
234
235 Ok(())
236}
237
238#[cfg(test)]
239mod tests {
240 use crate::{all_default_higher_order_functions, all_default_nested_functions};
241 use datafusion_common::Result;
242 use std::collections::HashSet;
243
244 #[test]
245 fn test_no_duplicate_name() -> Result<()> {
246 let scalars = all_default_nested_functions();
247 let scalars = scalars.iter().map(|s| (s.name(), s.aliases()));
248
249 let lambdas = all_default_higher_order_functions();
250 let lambdas = lambdas.iter().map(|l| (l.name(), l.aliases()));
251
252 let mut names = HashSet::new();
253
254 for (name, aliases) in scalars.chain(lambdas) {
255 assert!(
256 names.insert(name.to_string().to_lowercase()),
257 "duplicate function name: {name}",
258 );
259 for alias in aliases {
260 assert!(
261 names.insert(alias.to_string().to_lowercase()),
262 "duplicate function name: {alias}"
263 );
264 }
265 }
266 Ok(())
267 }
268}