datafusion_functions/
macros.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18/// macro that exports a list of function names as:
19/// 1. individual functions in an `expr_fn` module
20/// 2. a single function that returns a list of all functions
21///
22/// Equivalent to
23/// ```text
24/// pub mod expr_fn {
25///     use super::*;
26///     /// Return encode(arg)
27///     pub fn encode(args: Vec<Expr>) -> Expr {
28///         super::encode().call(args)
29///     }
30///  ...
31/// /// Return a list of all functions in this package
32/// pub(crate) fn functions() -> Vec<Arc<ScalarUDF>> {
33///     vec![
34///       encode(),
35///       decode()
36///    ]
37/// }
38/// ```
39///
40/// Exported functions accept:
41/// - `Vec<Expr>` argument (single argument followed by a comma)
42/// - Variable number of `Expr` arguments (zero or more arguments, must be without commas)
43#[macro_export]
44macro_rules! export_functions {
45    ($(($FUNC:ident, $DOC:expr, $($arg:tt)*)),*) => {
46        $(
47            // switch to single-function cases below
48            export_functions!(single $FUNC, $DOC, $($arg)*);
49        )*
50    };
51
52    // single vector argument (a single argument followed by a comma)
53    (single $FUNC:ident, $DOC:expr, $arg:ident,) => {
54        #[doc = $DOC]
55        pub fn $FUNC($arg: Vec<datafusion_expr::Expr>) -> datafusion_expr::Expr {
56            super::$FUNC().call($arg)
57        }
58    };
59
60    // variadic arguments (zero or more arguments, without commas)
61    (single $FUNC:ident, $DOC:expr, $($arg:ident)*) => {
62        #[doc = $DOC]
63        pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
64            super::$FUNC().call(vec![$($arg),*])
65        }
66    };
67}
68
69/// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
70/// named `$NAME` which returns that singleton.
71///
72/// This is used to ensure creating the list of `ScalarUDF` only happens once.
73#[macro_export]
74macro_rules! make_udf_function {
75    ($UDF:ty, $NAME:ident) => {
76        #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
77        pub fn $NAME() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
78            // Singleton instance of the function
79            static INSTANCE: std::sync::LazyLock<
80                std::sync::Arc<datafusion_expr::ScalarUDF>,
81            > = std::sync::LazyLock::new(|| {
82                std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
83                    <$UDF>::new(),
84                ))
85            });
86            std::sync::Arc::clone(&INSTANCE)
87        }
88    };
89}
90
91/// Macro creates a sub module if the feature is not enabled
92///
93/// The rationale for providing stub functions is to help users to configure datafusion
94/// properly (so they get an error telling them why a function is not available)
95/// instead of getting a cryptic "no function found" message at runtime.
96macro_rules! make_stub_package {
97    ($name:ident, $feature:literal) => {
98        #[cfg(not(feature = $feature))]
99        #[doc = concat!("Disabled. Enable via feature flag `", $feature, "`")]
100        pub mod $name {
101            use datafusion_expr::ScalarUDF;
102            use log::debug;
103            use std::sync::Arc;
104
105            /// Returns an empty list of functions when the feature is not enabled
106            pub fn functions() -> Vec<Arc<ScalarUDF>> {
107                debug!("{} functions disabled", stringify!($name));
108                vec![]
109            }
110        }
111    };
112}
113
114/// Downcast a named argument to a specific array type, returning an internal error
115/// if the cast fails
116///
117/// $ARG: ArrayRef
118/// $NAME: name of the argument (for error messages)
119/// $ARRAY_TYPE: the type of array to cast the argument to
120#[macro_export]
121macro_rules! downcast_named_arg {
122    ($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{
123        $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
124            internal_datafusion_err!(
125                "could not cast {} to {}",
126                $NAME,
127                std::any::type_name::<$ARRAY_TYPE>()
128            )
129        })?
130    }};
131}
132
133/// Downcast an argument to a specific array type, returning an internal error
134/// if the cast fails
135///
136/// $ARG: ArrayRef
137/// $ARRAY_TYPE: the type of array to cast the argument to
138#[macro_export]
139macro_rules! downcast_arg {
140    ($ARG:expr, $ARRAY_TYPE:ident) => {{
141        downcast_named_arg!($ARG, "", $ARRAY_TYPE)
142    }};
143}
144
145/// Macro to create a unary math UDF.
146///
147/// A unary math function takes an argument of type Float32 or Float64,
148/// applies a unary floating function to the argument, and returns a value of the same type.
149///
150/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
151/// $NAME: the name of the function
152/// $UNARY_FUNC: the unary function to apply to the argument
153/// $OUTPUT_ORDERING: the output ordering calculation method of the function
154/// $GET_DOC: the function to get the documentation of the UDF
155macro_rules! make_math_unary_udf {
156    ($UDF:ident, $NAME:ident, $UNARY_FUNC:ident, $OUTPUT_ORDERING:expr, $EVALUATE_BOUNDS:expr, $GET_DOC:expr) => {
157        make_udf_function!($NAME::$UDF, $NAME);
158
159        mod $NAME {
160            use std::any::Any;
161            use std::sync::Arc;
162
163            use arrow::array::{ArrayRef, AsArray};
164            use arrow::datatypes::{DataType, Float32Type, Float64Type};
165            use datafusion_common::{exec_err, Result};
166            use datafusion_expr::interval_arithmetic::Interval;
167            use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
168            use datafusion_expr::{
169                ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
170                Signature, Volatility,
171            };
172
173            #[derive(Debug)]
174            pub struct $UDF {
175                signature: Signature,
176            }
177
178            impl $UDF {
179                pub fn new() -> Self {
180                    use DataType::*;
181                    Self {
182                        signature: Signature::uniform(
183                            1,
184                            vec![Float64, Float32],
185                            Volatility::Immutable,
186                        ),
187                    }
188                }
189            }
190
191            impl ScalarUDFImpl for $UDF {
192                fn as_any(&self) -> &dyn Any {
193                    self
194                }
195                fn name(&self) -> &str {
196                    stringify!($NAME)
197                }
198
199                fn signature(&self) -> &Signature {
200                    &self.signature
201                }
202
203                fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
204                    let arg_type = &arg_types[0];
205
206                    match arg_type {
207                        DataType::Float32 => Ok(DataType::Float32),
208                        // For other types (possible values float64/null/int), use Float64
209                        _ => Ok(DataType::Float64),
210                    }
211                }
212
213                fn output_ordering(
214                    &self,
215                    input: &[ExprProperties],
216                ) -> Result<SortProperties> {
217                    $OUTPUT_ORDERING(input)
218                }
219
220                fn evaluate_bounds(&self, inputs: &[&Interval]) -> Result<Interval> {
221                    $EVALUATE_BOUNDS(inputs)
222                }
223
224                fn invoke_with_args(
225                    &self,
226                    args: ScalarFunctionArgs,
227                ) -> Result<ColumnarValue> {
228                    let args = ColumnarValue::values_to_arrays(&args.args)?;
229                    let arr: ArrayRef = match args[0].data_type() {
230                        DataType::Float64 => Arc::new(
231                            args[0]
232                                .as_primitive::<Float64Type>()
233                                .unary::<_, Float64Type>(|x: f64| f64::$UNARY_FUNC(x)),
234                        ) as ArrayRef,
235                        DataType::Float32 => Arc::new(
236                            args[0]
237                                .as_primitive::<Float32Type>()
238                                .unary::<_, Float32Type>(|x: f32| f32::$UNARY_FUNC(x)),
239                        ) as ArrayRef,
240                        other => {
241                            return exec_err!(
242                                "Unsupported data type {other:?} for function {}",
243                                self.name()
244                            )
245                        }
246                    };
247
248                    Ok(ColumnarValue::Array(arr))
249                }
250
251                fn documentation(&self) -> Option<&Documentation> {
252                    Some($GET_DOC())
253                }
254            }
255        }
256    };
257}
258
259/// Macro to create a binary math UDF.
260///
261/// A binary math function takes two arguments of types Float32 or Float64,
262/// applies a binary floating function to the argument, and returns a value of the same type.
263///
264/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
265/// $NAME: the name of the function
266/// $BINARY_FUNC: the binary function to apply to the argument
267/// $OUTPUT_ORDERING: the output ordering calculation method of the function
268/// $GET_DOC: the function to get the documentation of the UDF
269macro_rules! make_math_binary_udf {
270    ($UDF:ident, $NAME:ident, $BINARY_FUNC:ident, $OUTPUT_ORDERING:expr, $GET_DOC:expr) => {
271        make_udf_function!($NAME::$UDF, $NAME);
272
273        mod $NAME {
274            use std::any::Any;
275            use std::sync::Arc;
276
277            use arrow::array::{ArrayRef, AsArray};
278            use arrow::datatypes::{DataType, Float32Type, Float64Type};
279            use datafusion_common::{exec_err, Result};
280            use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
281            use datafusion_expr::TypeSignature;
282            use datafusion_expr::{
283                ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
284                Signature, Volatility,
285            };
286
287            #[derive(Debug)]
288            pub struct $UDF {
289                signature: Signature,
290            }
291
292            impl $UDF {
293                pub fn new() -> Self {
294                    use DataType::*;
295                    Self {
296                        signature: Signature::one_of(
297                            vec![
298                                TypeSignature::Exact(vec![Float32, Float32]),
299                                TypeSignature::Exact(vec![Float64, Float64]),
300                            ],
301                            Volatility::Immutable,
302                        ),
303                    }
304                }
305            }
306
307            impl ScalarUDFImpl for $UDF {
308                fn as_any(&self) -> &dyn Any {
309                    self
310                }
311                fn name(&self) -> &str {
312                    stringify!($NAME)
313                }
314
315                fn signature(&self) -> &Signature {
316                    &self.signature
317                }
318
319                fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
320                    let arg_type = &arg_types[0];
321
322                    match arg_type {
323                        DataType::Float32 => Ok(DataType::Float32),
324                        // For other types (possible values float64/null/int), use Float64
325                        _ => Ok(DataType::Float64),
326                    }
327                }
328
329                fn output_ordering(
330                    &self,
331                    input: &[ExprProperties],
332                ) -> Result<SortProperties> {
333                    $OUTPUT_ORDERING(input)
334                }
335
336                fn invoke_with_args(
337                    &self,
338                    args: ScalarFunctionArgs,
339                ) -> Result<ColumnarValue> {
340                    let args = ColumnarValue::values_to_arrays(&args.args)?;
341                    let arr: ArrayRef = match args[0].data_type() {
342                        DataType::Float64 => {
343                            let y = args[0].as_primitive::<Float64Type>();
344                            let x = args[1].as_primitive::<Float64Type>();
345                            let result = arrow::compute::binary::<_, _, _, Float64Type>(
346                                y,
347                                x,
348                                |y, x| f64::$BINARY_FUNC(y, x),
349                            )?;
350                            Arc::new(result) as _
351                        }
352                        DataType::Float32 => {
353                            let y = args[0].as_primitive::<Float32Type>();
354                            let x = args[1].as_primitive::<Float32Type>();
355                            let result = arrow::compute::binary::<_, _, _, Float32Type>(
356                                y,
357                                x,
358                                |y, x| f32::$BINARY_FUNC(y, x),
359                            )?;
360                            Arc::new(result) as _
361                        }
362                        other => {
363                            return exec_err!(
364                                "Unsupported data type {other:?} for function {}",
365                                self.name()
366                            )
367                        }
368                    };
369
370                    Ok(ColumnarValue::Array(arr))
371                }
372
373                fn documentation(&self) -> Option<&Documentation> {
374                    Some($GET_DOC())
375                }
376            }
377        }
378    };
379}