datafusion_functions/
macros.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18/// macro that exports a list of function names as:
19/// 1. individual functions in an `expr_fn` module
20/// 2. a single function that returns a list of all functions
21///
22/// Equivalent to
23/// ```text
24/// pub mod expr_fn {
25///     use super::*;
26///     /// Return encode(arg)
27///     pub fn encode(args: Vec<Expr>) -> Expr {
28///         super::encode().call(args)
29///     }
30///  ...
31/// /// Return a list of all functions in this package
32/// pub(crate) fn functions() -> Vec<Arc<ScalarUDF>> {
33///     vec![
34///       encode(),
35///       decode()
36///    ]
37/// }
38/// ```
39///
40/// Exported functions accept:
41/// - `Vec<Expr>` argument (single argument followed by a comma)
42/// - Variable number of `Expr` arguments (zero or more arguments, must be without commas)
43/// - Functions that require config (marked with `@config` prefix)
44///
45/// Note on configuration construction paths:
46/// - The convenience wrappers generated for `@config` functions call the inner
47///   constructor with `ConfigOptions::default()`. These wrappers are intended
48///   primarily for programmatic `Expr` construction and convenience usage.
49/// - When functions are registered in a session, DataFusion will call
50///   `with_updated_config()` to create a `ScalarUDF` instance using the session's
51///   actual `ConfigOptions`. This also happens when configuration changes at runtime
52///   (e.g., via `SET` statements). In short: the macro uses the default config for
53///   convenience constructors; the session config is applied when functions are
54///   registered or when configuration is updated.
55#[macro_export]
56macro_rules! export_functions {
57    ($(($FUNC:ident, $DOC:expr, $($arg:tt)*)),*) => {
58        $(
59            // switch to single-function cases below
60            $crate::export_functions!(single $FUNC, $DOC, $($arg)*);
61        )*
62    };
63
64    // function that requires config (marked with @config)
65    (single $FUNC:ident, $DOC:expr, @config) => {
66        #[doc = $DOC]
67        pub fn $FUNC() -> datafusion_expr::Expr {
68            use datafusion_common::config::ConfigOptions;
69            super::$FUNC(&ConfigOptions::default()).call(vec![])
70        }
71    };
72
73    // function that requires config and takes a vector argument
74    (single $FUNC:ident, $DOC:expr, @config $arg:ident,) => {
75        #[doc = $DOC]
76        pub fn $FUNC($arg: Vec<datafusion_expr::Expr>) -> datafusion_expr::Expr {
77            use datafusion_common::config::ConfigOptions;
78            super::$FUNC(&ConfigOptions::default()).call($arg)
79        }
80    };
81
82    // function that requires config and variadic arguments
83    (single $FUNC:ident, $DOC:expr, @config $($arg:ident)*) => {
84        #[doc = $DOC]
85        pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
86            use datafusion_common::config::ConfigOptions;
87            super::$FUNC(&ConfigOptions::default()).call(vec![$($arg),*])
88        }
89    };
90
91    // single vector argument (a single argument followed by a comma)
92    (single $FUNC:ident, $DOC:expr, $arg:ident,) => {
93        #[doc = $DOC]
94        pub fn $FUNC($arg: Vec<datafusion_expr::Expr>) -> datafusion_expr::Expr {
95            super::$FUNC().call($arg)
96        }
97    };
98
99    // variadic arguments (zero or more arguments, without commas)
100    (single $FUNC:ident, $DOC:expr, $($arg:ident)*) => {
101        #[doc = $DOC]
102        pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
103            super::$FUNC().call(vec![$($arg),*])
104        }
105    };
106}
107
108/// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
109/// named `$NAME` which returns that singleton. Optionally use a custom constructor
110/// `$CTOR` which defaults to `$UDF::new()` if not specified.
111///
112/// This is used to ensure creating the list of `ScalarUDF` only happens once.
113#[macro_export]
114macro_rules! make_udf_function {
115    ($UDF:ty, $NAME:ident, $CTOR:expr) => {
116        #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
117        pub fn $NAME() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
118            // Singleton instance of the function
119            static INSTANCE: std::sync::LazyLock<
120                std::sync::Arc<datafusion_expr::ScalarUDF>,
121            > = std::sync::LazyLock::new(|| {
122                std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
123                    ($CTOR)(),
124                ))
125            });
126            std::sync::Arc::clone(&INSTANCE)
127        }
128    };
129    ($UDF:ty, $NAME:ident) => {
130        make_udf_function!($UDF, $NAME, <$UDF>::new);
131    };
132}
133
134/// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
135/// named `$NAME` which returns that singleton. The function takes a
136/// configuration argument of type `$CONFIG_TYPE` to create the UDF.
137#[macro_export]
138macro_rules! make_udf_function_with_config {
139    ($UDF:ty, $NAME:ident) => {
140        #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
141        pub fn $NAME(config: &datafusion_common::config::ConfigOptions) -> std::sync::Arc<datafusion_expr::ScalarUDF> {
142            std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
143                <$UDF>::new_with_config(&config),
144            ))
145        }
146    };
147}
148
149/// Macro creates a sub module if the feature is not enabled
150///
151/// The rationale for providing stub functions is to help users to configure datafusion
152/// properly (so they get an error telling them why a function is not available)
153/// instead of getting a cryptic "no function found" message at runtime.
154macro_rules! make_stub_package {
155    ($name:ident, $feature:literal) => {
156        #[cfg(not(feature = $feature))]
157        #[doc = concat!("Disabled. Enable via feature flag `", $feature, "`")]
158        pub mod $name {
159            use datafusion_expr::ScalarUDF;
160            use log::debug;
161            use std::sync::Arc;
162
163            /// Returns an empty list of functions when the feature is not enabled
164            pub fn functions() -> Vec<Arc<ScalarUDF>> {
165                debug!("{} functions disabled", stringify!($name));
166                vec![]
167            }
168        }
169    };
170}
171
172/// Downcast a named argument to a specific array type, returning an internal error
173/// if the cast fails
174///
175/// $ARG: ArrayRef
176/// $NAME: name of the argument (for error messages)
177/// $ARRAY_TYPE: the type of array to cast the argument to
178#[macro_export]
179macro_rules! downcast_named_arg {
180    ($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{
181        $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
182            datafusion_common::internal_datafusion_err!(
183                "could not cast {} to {}",
184                $NAME,
185                std::any::type_name::<$ARRAY_TYPE>()
186            )
187        })?
188    }};
189}
190
191/// Downcast an argument to a specific array type, returning an internal error
192/// if the cast fails
193///
194/// $ARG: ArrayRef
195/// $ARRAY_TYPE: the type of array to cast the argument to
196#[macro_export]
197macro_rules! downcast_arg {
198    ($ARG:expr, $ARRAY_TYPE:ident) => {{ $crate::downcast_named_arg!($ARG, "", $ARRAY_TYPE) }};
199}
200
201/// Macro to create a unary math UDF.
202///
203/// A unary math function takes an argument of type Float32 or Float64,
204/// applies a unary floating function to the argument, and returns a value of the same type.
205///
206/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
207/// $NAME: the name of the function
208/// $UNARY_FUNC: the unary function to apply to the argument
209/// $OUTPUT_ORDERING: the output ordering calculation method of the function
210/// $GET_DOC: the function to get the documentation of the UDF
211macro_rules! make_math_unary_udf {
212    ($UDF:ident, $NAME:ident, $UNARY_FUNC:ident, $OUTPUT_ORDERING:expr, $EVALUATE_BOUNDS:expr, $GET_DOC:expr) => {
213        $crate::make_udf_function!($NAME::$UDF, $NAME);
214
215        mod $NAME {
216            use std::any::Any;
217            use std::sync::Arc;
218
219            use arrow::array::{ArrayRef, AsArray};
220            use arrow::datatypes::{DataType, Float32Type, Float64Type};
221            use datafusion_common::{Result, exec_err};
222            use datafusion_expr::interval_arithmetic::Interval;
223            use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
224            use datafusion_expr::{
225                ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
226                Signature, Volatility,
227            };
228
229            #[derive(Debug, PartialEq, Eq, Hash)]
230            pub struct $UDF {
231                signature: Signature,
232            }
233
234            impl $UDF {
235                pub fn new() -> Self {
236                    use DataType::*;
237                    Self {
238                        signature: Signature::uniform(
239                            1,
240                            vec![Float64, Float32],
241                            Volatility::Immutable,
242                        ),
243                    }
244                }
245            }
246
247            impl ScalarUDFImpl for $UDF {
248                fn as_any(&self) -> &dyn Any {
249                    self
250                }
251                fn name(&self) -> &str {
252                    stringify!($NAME)
253                }
254
255                fn signature(&self) -> &Signature {
256                    &self.signature
257                }
258
259                fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
260                    let arg_type = &arg_types[0];
261
262                    match arg_type {
263                        DataType::Float32 => Ok(DataType::Float32),
264                        // For other types (possible values float64/null/int), use Float64
265                        _ => Ok(DataType::Float64),
266                    }
267                }
268
269                fn output_ordering(
270                    &self,
271                    input: &[ExprProperties],
272                ) -> Result<SortProperties> {
273                    $OUTPUT_ORDERING(input)
274                }
275
276                fn evaluate_bounds(&self, inputs: &[&Interval]) -> Result<Interval> {
277                    $EVALUATE_BOUNDS(inputs)
278                }
279
280                fn invoke_with_args(
281                    &self,
282                    args: ScalarFunctionArgs,
283                ) -> Result<ColumnarValue> {
284                    let args = ColumnarValue::values_to_arrays(&args.args)?;
285                    let arr: ArrayRef = match args[0].data_type() {
286                        DataType::Float64 => Arc::new(
287                            args[0]
288                                .as_primitive::<Float64Type>()
289                                .unary::<_, Float64Type>(|x: f64| f64::$UNARY_FUNC(x)),
290                        ) as ArrayRef,
291                        DataType::Float32 => Arc::new(
292                            args[0]
293                                .as_primitive::<Float32Type>()
294                                .unary::<_, Float32Type>(|x: f32| f32::$UNARY_FUNC(x)),
295                        ) as ArrayRef,
296                        other => {
297                            return exec_err!(
298                                "Unsupported data type {other:?} for function {}",
299                                self.name()
300                            );
301                        }
302                    };
303
304                    Ok(ColumnarValue::Array(arr))
305                }
306
307                fn documentation(&self) -> Option<&Documentation> {
308                    Some($GET_DOC())
309                }
310            }
311        }
312    };
313}
314
315/// Macro to create a binary math UDF.
316///
317/// A binary math function takes two arguments of types Float32 or Float64,
318/// applies a binary floating function to the argument, and returns a value of the same type.
319///
320/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
321/// $NAME: the name of the function
322/// $BINARY_FUNC: the binary function to apply to the argument
323/// $OUTPUT_ORDERING: the output ordering calculation method of the function
324/// $GET_DOC: the function to get the documentation of the UDF
325macro_rules! make_math_binary_udf {
326    ($UDF:ident, $NAME:ident, $BINARY_FUNC:ident, $OUTPUT_ORDERING:expr, $GET_DOC:expr) => {
327        $crate::make_udf_function!($NAME::$UDF, $NAME);
328
329        mod $NAME {
330            use std::any::Any;
331            use std::sync::Arc;
332
333            use arrow::array::{ArrayRef, AsArray};
334            use arrow::datatypes::{DataType, Float32Type, Float64Type};
335            use datafusion_common::utils::take_function_args;
336            use datafusion_common::{Result, ScalarValue, internal_err};
337            use datafusion_expr::TypeSignature;
338            use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
339            use datafusion_expr::{
340                ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
341                Signature, Volatility,
342            };
343
344            #[derive(Debug, PartialEq, Eq, Hash)]
345            pub struct $UDF {
346                signature: Signature,
347            }
348
349            impl $UDF {
350                pub fn new() -> Self {
351                    use DataType::*;
352                    Self {
353                        signature: Signature::one_of(
354                            vec![
355                                TypeSignature::Exact(vec![Float32, Float32]),
356                                TypeSignature::Exact(vec![Float64, Float64]),
357                            ],
358                            Volatility::Immutable,
359                        ),
360                    }
361                }
362            }
363
364            impl ScalarUDFImpl for $UDF {
365                fn as_any(&self) -> &dyn Any {
366                    self
367                }
368                fn name(&self) -> &str {
369                    stringify!($NAME)
370                }
371
372                fn signature(&self) -> &Signature {
373                    &self.signature
374                }
375
376                fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
377                    let arg_type = &arg_types[0];
378
379                    match arg_type {
380                        DataType::Float32 => Ok(DataType::Float32),
381                        // For other types (possible values float64/null/int), use Float64
382                        _ => Ok(DataType::Float64),
383                    }
384                }
385
386                fn output_ordering(
387                    &self,
388                    input: &[ExprProperties],
389                ) -> Result<SortProperties> {
390                    $OUTPUT_ORDERING(input)
391                }
392
393                fn invoke_with_args(
394                    &self,
395                    args: ScalarFunctionArgs,
396                ) -> Result<ColumnarValue> {
397                    let ScalarFunctionArgs {
398                        args, return_field, ..
399                    } = args;
400                    let return_type = return_field.data_type();
401                    let [y, x] = take_function_args(self.name(), args)?;
402
403                    match (y, x) {
404                        (
405                            ColumnarValue::Scalar(y_scalar),
406                            ColumnarValue::Scalar(x_scalar),
407                        ) => match (&y_scalar, &x_scalar) {
408                            (y, x) if y.is_null() || x.is_null() => {
409                                ColumnarValue::Scalar(ScalarValue::Null)
410                                    .cast_to(return_type, None)
411                            }
412                            (
413                                ScalarValue::Float64(Some(yv)),
414                                ScalarValue::Float64(Some(xv)),
415                            ) => Ok(ColumnarValue::Scalar(ScalarValue::Float64(Some(
416                                f64::$BINARY_FUNC(*yv, *xv),
417                            )))),
418                            (
419                                ScalarValue::Float32(Some(yv)),
420                                ScalarValue::Float32(Some(xv)),
421                            ) => Ok(ColumnarValue::Scalar(ScalarValue::Float32(Some(
422                                f32::$BINARY_FUNC(*yv, *xv),
423                            )))),
424                            _ => internal_err!(
425                                "Unexpected scalar types for function {}: {:?}, {:?}",
426                                self.name(),
427                                y_scalar.data_type(),
428                                x_scalar.data_type()
429                            ),
430                        },
431                        (y, x) => {
432                            let args = ColumnarValue::values_to_arrays(&[y, x])?;
433                            let arr: ArrayRef = match args[0].data_type() {
434                                DataType::Float64 => {
435                                    let y = args[0].as_primitive::<Float64Type>();
436                                    let x = args[1].as_primitive::<Float64Type>();
437                                    let result =
438                                        arrow::compute::binary::<_, _, _, Float64Type>(
439                                            y,
440                                            x,
441                                            |y, x| f64::$BINARY_FUNC(y, x),
442                                        )?;
443                                    Arc::new(result) as _
444                                }
445                                DataType::Float32 => {
446                                    let y = args[0].as_primitive::<Float32Type>();
447                                    let x = args[1].as_primitive::<Float32Type>();
448                                    let result =
449                                        arrow::compute::binary::<_, _, _, Float32Type>(
450                                            y,
451                                            x,
452                                            |y, x| f32::$BINARY_FUNC(y, x),
453                                        )?;
454                                    Arc::new(result) as _
455                                }
456                                other => {
457                                    return internal_err!(
458                                        "Unsupported data type {other:?} for function {}",
459                                        self.name()
460                                    );
461                                }
462                            };
463
464                            Ok(ColumnarValue::Array(arr))
465                        }
466                    }
467                }
468
469                fn documentation(&self) -> Option<&Documentation> {
470                    Some($GET_DOC())
471                }
472            }
473        }
474    };
475}
datafusion_functions/macros.rs

datafusion_functions/
macros.rs