datafusion_functions/
macros.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18/// macro that exports a list of function names as:
19/// 1. individual functions in an `expr_fn` module
20/// 2. a single function that returns a list of all functions
21///
22/// Equivalent to
23/// ```text
24/// pub mod expr_fn {
25///     use super::*;
26///     /// Return encode(arg)
27///     pub fn encode(args: Vec<Expr>) -> Expr {
28///         super::encode().call(args)
29///     }
30///  ...
31/// /// Return a list of all functions in this package
32/// pub(crate) fn functions() -> Vec<Arc<ScalarUDF>> {
33///     vec![
34///       encode(),
35///       decode()
36///    ]
37/// }
38/// ```
39///
40/// Exported functions accept:
41/// - `Vec<Expr>` argument (single argument followed by a comma)
42/// - Variable number of `Expr` arguments (zero or more arguments, must be without commas)
43/// - Functions that require config (marked with `@config` prefix)
44#[macro_export]
45macro_rules! export_functions {
46    ($(($FUNC:ident, $DOC:expr, $($arg:tt)*)),*) => {
47        $(
48            // switch to single-function cases below
49            $crate::export_functions!(single $FUNC, $DOC, $($arg)*);
50        )*
51    };
52
53    // function that requires config (marked with @config)
54    (single $FUNC:ident, $DOC:expr, @config) => {
55        #[doc = $DOC]
56        pub fn $FUNC() -> datafusion_expr::Expr {
57            use datafusion_common::config::ConfigOptions;
58            super::$FUNC(&ConfigOptions::default()).call(vec![])
59        }
60    };
61
62    // single vector argument (a single argument followed by a comma)
63    (single $FUNC:ident, $DOC:expr, $arg:ident,) => {
64        #[doc = $DOC]
65        pub fn $FUNC($arg: Vec<datafusion_expr::Expr>) -> datafusion_expr::Expr {
66            super::$FUNC().call($arg)
67        }
68    };
69
70    // variadic arguments (zero or more arguments, without commas)
71    (single $FUNC:ident, $DOC:expr, $($arg:ident)*) => {
72        #[doc = $DOC]
73        pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
74            super::$FUNC().call(vec![$($arg),*])
75        }
76    };
77}
78
79/// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
80/// named `$NAME` which returns that singleton.
81///
82/// This is used to ensure creating the list of `ScalarUDF` only happens once.
83#[macro_export]
84macro_rules! make_udf_function {
85    ($UDF:ty, $NAME:ident) => {
86        #[allow(rustdoc::redundant_explicit_links)]
87        #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
88        pub fn $NAME() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
89            // Singleton instance of the function
90            static INSTANCE: std::sync::LazyLock<
91                std::sync::Arc<datafusion_expr::ScalarUDF>,
92            > = std::sync::LazyLock::new(|| {
93                std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
94                    <$UDF>::new(),
95                ))
96            });
97            std::sync::Arc::clone(&INSTANCE)
98        }
99    };
100}
101
102/// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
103/// named `$NAME` which returns that singleton. The function takes a
104/// configuration argument of type `$CONFIG_TYPE` to create the UDF.
105#[macro_export]
106macro_rules! make_udf_function_with_config {
107    ($UDF:ty, $NAME:ident) => {
108        #[allow(rustdoc::redundant_explicit_links)]
109        #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
110        pub fn $NAME(config: &datafusion_common::config::ConfigOptions) -> std::sync::Arc<datafusion_expr::ScalarUDF> {
111            std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
112                <$UDF>::new_with_config(&config),
113            ))
114        }
115    };
116}
117
118/// Macro creates a sub module if the feature is not enabled
119///
120/// The rationale for providing stub functions is to help users to configure datafusion
121/// properly (so they get an error telling them why a function is not available)
122/// instead of getting a cryptic "no function found" message at runtime.
123macro_rules! make_stub_package {
124    ($name:ident, $feature:literal) => {
125        #[cfg(not(feature = $feature))]
126        #[doc = concat!("Disabled. Enable via feature flag `", $feature, "`")]
127        pub mod $name {
128            use datafusion_expr::ScalarUDF;
129            use log::debug;
130            use std::sync::Arc;
131
132            /// Returns an empty list of functions when the feature is not enabled
133            pub fn functions() -> Vec<Arc<ScalarUDF>> {
134                debug!("{} functions disabled", stringify!($name));
135                vec![]
136            }
137        }
138    };
139}
140
141/// Downcast a named argument to a specific array type, returning an internal error
142/// if the cast fails
143///
144/// $ARG: ArrayRef
145/// $NAME: name of the argument (for error messages)
146/// $ARRAY_TYPE: the type of array to cast the argument to
147#[macro_export]
148macro_rules! downcast_named_arg {
149    ($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{
150        $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
151            datafusion_common::internal_datafusion_err!(
152                "could not cast {} to {}",
153                $NAME,
154                std::any::type_name::<$ARRAY_TYPE>()
155            )
156        })?
157    }};
158}
159
160/// Downcast an argument to a specific array type, returning an internal error
161/// if the cast fails
162///
163/// $ARG: ArrayRef
164/// $ARRAY_TYPE: the type of array to cast the argument to
165#[macro_export]
166macro_rules! downcast_arg {
167    ($ARG:expr, $ARRAY_TYPE:ident) => {{
168        $crate::downcast_named_arg!($ARG, "", $ARRAY_TYPE)
169    }};
170}
171
172/// Macro to create a unary math UDF.
173///
174/// A unary math function takes an argument of type Float32 or Float64,
175/// applies a unary floating function to the argument, and returns a value of the same type.
176///
177/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
178/// $NAME: the name of the function
179/// $UNARY_FUNC: the unary function to apply to the argument
180/// $OUTPUT_ORDERING: the output ordering calculation method of the function
181/// $GET_DOC: the function to get the documentation of the UDF
182macro_rules! make_math_unary_udf {
183    ($UDF:ident, $NAME:ident, $UNARY_FUNC:ident, $OUTPUT_ORDERING:expr, $EVALUATE_BOUNDS:expr, $GET_DOC:expr) => {
184        $crate::make_udf_function!($NAME::$UDF, $NAME);
185
186        mod $NAME {
187            use std::any::Any;
188            use std::sync::Arc;
189
190            use arrow::array::{ArrayRef, AsArray};
191            use arrow::datatypes::{DataType, Float32Type, Float64Type};
192            use datafusion_common::{exec_err, Result};
193            use datafusion_expr::interval_arithmetic::Interval;
194            use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
195            use datafusion_expr::{
196                ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
197                Signature, Volatility,
198            };
199
200            #[derive(Debug, PartialEq, Eq, Hash)]
201            pub struct $UDF {
202                signature: Signature,
203            }
204
205            impl $UDF {
206                pub fn new() -> Self {
207                    use DataType::*;
208                    Self {
209                        signature: Signature::uniform(
210                            1,
211                            vec![Float64, Float32],
212                            Volatility::Immutable,
213                        ),
214                    }
215                }
216            }
217
218            impl ScalarUDFImpl for $UDF {
219                fn as_any(&self) -> &dyn Any {
220                    self
221                }
222                fn name(&self) -> &str {
223                    stringify!($NAME)
224                }
225
226                fn signature(&self) -> &Signature {
227                    &self.signature
228                }
229
230                fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
231                    let arg_type = &arg_types[0];
232
233                    match arg_type {
234                        DataType::Float32 => Ok(DataType::Float32),
235                        // For other types (possible values float64/null/int), use Float64
236                        _ => Ok(DataType::Float64),
237                    }
238                }
239
240                fn output_ordering(
241                    &self,
242                    input: &[ExprProperties],
243                ) -> Result<SortProperties> {
244                    $OUTPUT_ORDERING(input)
245                }
246
247                fn evaluate_bounds(&self, inputs: &[&Interval]) -> Result<Interval> {
248                    $EVALUATE_BOUNDS(inputs)
249                }
250
251                fn invoke_with_args(
252                    &self,
253                    args: ScalarFunctionArgs,
254                ) -> Result<ColumnarValue> {
255                    let args = ColumnarValue::values_to_arrays(&args.args)?;
256                    let arr: ArrayRef = match args[0].data_type() {
257                        DataType::Float64 => Arc::new(
258                            args[0]
259                                .as_primitive::<Float64Type>()
260                                .unary::<_, Float64Type>(|x: f64| f64::$UNARY_FUNC(x)),
261                        ) as ArrayRef,
262                        DataType::Float32 => Arc::new(
263                            args[0]
264                                .as_primitive::<Float32Type>()
265                                .unary::<_, Float32Type>(|x: f32| f32::$UNARY_FUNC(x)),
266                        ) as ArrayRef,
267                        other => {
268                            return exec_err!(
269                                "Unsupported data type {other:?} for function {}",
270                                self.name()
271                            )
272                        }
273                    };
274
275                    Ok(ColumnarValue::Array(arr))
276                }
277
278                fn documentation(&self) -> Option<&Documentation> {
279                    Some($GET_DOC())
280                }
281            }
282        }
283    };
284}
285
286/// Macro to create a binary math UDF.
287///
288/// A binary math function takes two arguments of types Float32 or Float64,
289/// applies a binary floating function to the argument, and returns a value of the same type.
290///
291/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
292/// $NAME: the name of the function
293/// $BINARY_FUNC: the binary function to apply to the argument
294/// $OUTPUT_ORDERING: the output ordering calculation method of the function
295/// $GET_DOC: the function to get the documentation of the UDF
296macro_rules! make_math_binary_udf {
297    ($UDF:ident, $NAME:ident, $BINARY_FUNC:ident, $OUTPUT_ORDERING:expr, $GET_DOC:expr) => {
298        $crate::make_udf_function!($NAME::$UDF, $NAME);
299
300        mod $NAME {
301            use std::any::Any;
302            use std::sync::Arc;
303
304            use arrow::array::{ArrayRef, AsArray};
305            use arrow::datatypes::{DataType, Float32Type, Float64Type};
306            use datafusion_common::{exec_err, Result};
307            use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
308            use datafusion_expr::TypeSignature;
309            use datafusion_expr::{
310                ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
311                Signature, Volatility,
312            };
313
314            #[derive(Debug, PartialEq, Eq, Hash)]
315            pub struct $UDF {
316                signature: Signature,
317            }
318
319            impl $UDF {
320                pub fn new() -> Self {
321                    use DataType::*;
322                    Self {
323                        signature: Signature::one_of(
324                            vec![
325                                TypeSignature::Exact(vec![Float32, Float32]),
326                                TypeSignature::Exact(vec![Float64, Float64]),
327                            ],
328                            Volatility::Immutable,
329                        ),
330                    }
331                }
332            }
333
334            impl ScalarUDFImpl for $UDF {
335                fn as_any(&self) -> &dyn Any {
336                    self
337                }
338                fn name(&self) -> &str {
339                    stringify!($NAME)
340                }
341
342                fn signature(&self) -> &Signature {
343                    &self.signature
344                }
345
346                fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
347                    let arg_type = &arg_types[0];
348
349                    match arg_type {
350                        DataType::Float32 => Ok(DataType::Float32),
351                        // For other types (possible values float64/null/int), use Float64
352                        _ => Ok(DataType::Float64),
353                    }
354                }
355
356                fn output_ordering(
357                    &self,
358                    input: &[ExprProperties],
359                ) -> Result<SortProperties> {
360                    $OUTPUT_ORDERING(input)
361                }
362
363                fn invoke_with_args(
364                    &self,
365                    args: ScalarFunctionArgs,
366                ) -> Result<ColumnarValue> {
367                    let args = ColumnarValue::values_to_arrays(&args.args)?;
368                    let arr: ArrayRef = match args[0].data_type() {
369                        DataType::Float64 => {
370                            let y = args[0].as_primitive::<Float64Type>();
371                            let x = args[1].as_primitive::<Float64Type>();
372                            let result = arrow::compute::binary::<_, _, _, Float64Type>(
373                                y,
374                                x,
375                                |y, x| f64::$BINARY_FUNC(y, x),
376                            )?;
377                            Arc::new(result) as _
378                        }
379                        DataType::Float32 => {
380                            let y = args[0].as_primitive::<Float32Type>();
381                            let x = args[1].as_primitive::<Float32Type>();
382                            let result = arrow::compute::binary::<_, _, _, Float32Type>(
383                                y,
384                                x,
385                                |y, x| f32::$BINARY_FUNC(y, x),
386                            )?;
387                            Arc::new(result) as _
388                        }
389                        other => {
390                            return exec_err!(
391                                "Unsupported data type {other:?} for function {}",
392                                self.name()
393                            )
394                        }
395                    };
396
397                    Ok(ColumnarValue::Array(arr))
398                }
399
400                fn documentation(&self) -> Option<&Documentation> {
401                    Some($GET_DOC())
402                }
403            }
404        }
405    };
406}