datafusion_functions/
macros.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18/// macro that exports a list of function names as:
19/// 1. individual functions in an `expr_fn` module
20/// 2. a single function that returns a list of all functions
21///
22/// Equivalent to
23/// ```text
24/// pub mod expr_fn {
25///     use super::*;
26///     /// Return encode(arg)
27///     pub fn encode(args: Vec<Expr>) -> Expr {
28///         super::encode().call(args)
29///     }
30///  ...
31/// /// Return a list of all functions in this package
32/// pub(crate) fn functions() -> Vec<Arc<ScalarUDF>> {
33///     vec![
34///       encode(),
35///       decode()
36///    ]
37/// }
38/// ```
39///
40/// Exported functions accept:
41/// - `Vec<Expr>` argument (single argument followed by a comma)
42/// - Variable number of `Expr` arguments (zero or more arguments, must be without commas)
43#[macro_export]
44macro_rules! export_functions {
45    ($(($FUNC:ident, $DOC:expr, $($arg:tt)*)),*) => {
46        $(
47            // switch to single-function cases below
48            export_functions!(single $FUNC, $DOC, $($arg)*);
49        )*
50    };
51
52    // single vector argument (a single argument followed by a comma)
53    (single $FUNC:ident, $DOC:expr, $arg:ident,) => {
54        #[doc = $DOC]
55        pub fn $FUNC($arg: Vec<datafusion_expr::Expr>) -> datafusion_expr::Expr {
56            super::$FUNC().call($arg)
57        }
58    };
59
60    // variadic arguments (zero or more arguments, without commas)
61    (single $FUNC:ident, $DOC:expr, $($arg:ident)*) => {
62        #[doc = $DOC]
63        pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
64            super::$FUNC().call(vec![$($arg),*])
65        }
66    };
67}
68
69/// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
70/// named `$NAME` which returns that singleton.
71///
72/// This is used to ensure creating the list of `ScalarUDF` only happens once.
73#[macro_export]
74macro_rules! make_udf_function {
75    ($UDF:ty, $NAME:ident) => {
76        #[allow(rustdoc::redundant_explicit_links)]
77        #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
78        pub fn $NAME() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
79            // Singleton instance of the function
80            static INSTANCE: std::sync::LazyLock<
81                std::sync::Arc<datafusion_expr::ScalarUDF>,
82            > = std::sync::LazyLock::new(|| {
83                std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
84                    <$UDF>::new(),
85                ))
86            });
87            std::sync::Arc::clone(&INSTANCE)
88        }
89    };
90}
91
92/// Macro creates a sub module if the feature is not enabled
93///
94/// The rationale for providing stub functions is to help users to configure datafusion
95/// properly (so they get an error telling them why a function is not available)
96/// instead of getting a cryptic "no function found" message at runtime.
97macro_rules! make_stub_package {
98    ($name:ident, $feature:literal) => {
99        #[cfg(not(feature = $feature))]
100        #[doc = concat!("Disabled. Enable via feature flag `", $feature, "`")]
101        pub mod $name {
102            use datafusion_expr::ScalarUDF;
103            use log::debug;
104            use std::sync::Arc;
105
106            /// Returns an empty list of functions when the feature is not enabled
107            pub fn functions() -> Vec<Arc<ScalarUDF>> {
108                debug!("{} functions disabled", stringify!($name));
109                vec![]
110            }
111        }
112    };
113}
114
115/// Downcast a named argument to a specific array type, returning an internal error
116/// if the cast fails
117///
118/// $ARG: ArrayRef
119/// $NAME: name of the argument (for error messages)
120/// $ARRAY_TYPE: the type of array to cast the argument to
121#[macro_export]
122macro_rules! downcast_named_arg {
123    ($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{
124        $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
125            internal_datafusion_err!(
126                "could not cast {} to {}",
127                $NAME,
128                std::any::type_name::<$ARRAY_TYPE>()
129            )
130        })?
131    }};
132}
133
134/// Downcast an argument to a specific array type, returning an internal error
135/// if the cast fails
136///
137/// $ARG: ArrayRef
138/// $ARRAY_TYPE: the type of array to cast the argument to
139#[macro_export]
140macro_rules! downcast_arg {
141    ($ARG:expr, $ARRAY_TYPE:ident) => {{
142        downcast_named_arg!($ARG, "", $ARRAY_TYPE)
143    }};
144}
145
146/// Macro to create a unary math UDF.
147///
148/// A unary math function takes an argument of type Float32 or Float64,
149/// applies a unary floating function to the argument, and returns a value of the same type.
150///
151/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
152/// $NAME: the name of the function
153/// $UNARY_FUNC: the unary function to apply to the argument
154/// $OUTPUT_ORDERING: the output ordering calculation method of the function
155/// $GET_DOC: the function to get the documentation of the UDF
156macro_rules! make_math_unary_udf {
157    ($UDF:ident, $NAME:ident, $UNARY_FUNC:ident, $OUTPUT_ORDERING:expr, $EVALUATE_BOUNDS:expr, $GET_DOC:expr) => {
158        make_udf_function!($NAME::$UDF, $NAME);
159
160        mod $NAME {
161            use std::any::Any;
162            use std::sync::Arc;
163
164            use arrow::array::{ArrayRef, AsArray};
165            use arrow::datatypes::{DataType, Float32Type, Float64Type};
166            use datafusion_common::{exec_err, Result};
167            use datafusion_expr::interval_arithmetic::Interval;
168            use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
169            use datafusion_expr::{
170                ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
171                Signature, Volatility,
172            };
173
174            #[derive(Debug, PartialEq, Eq, Hash)]
175            pub struct $UDF {
176                signature: Signature,
177            }
178
179            impl $UDF {
180                pub fn new() -> Self {
181                    use DataType::*;
182                    Self {
183                        signature: Signature::uniform(
184                            1,
185                            vec![Float64, Float32],
186                            Volatility::Immutable,
187                        ),
188                    }
189                }
190            }
191
192            impl ScalarUDFImpl for $UDF {
193                fn as_any(&self) -> &dyn Any {
194                    self
195                }
196                fn name(&self) -> &str {
197                    stringify!($NAME)
198                }
199
200                fn signature(&self) -> &Signature {
201                    &self.signature
202                }
203
204                fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
205                    let arg_type = &arg_types[0];
206
207                    match arg_type {
208                        DataType::Float32 => Ok(DataType::Float32),
209                        // For other types (possible values float64/null/int), use Float64
210                        _ => Ok(DataType::Float64),
211                    }
212                }
213
214                fn output_ordering(
215                    &self,
216                    input: &[ExprProperties],
217                ) -> Result<SortProperties> {
218                    $OUTPUT_ORDERING(input)
219                }
220
221                fn evaluate_bounds(&self, inputs: &[&Interval]) -> Result<Interval> {
222                    $EVALUATE_BOUNDS(inputs)
223                }
224
225                fn invoke_with_args(
226                    &self,
227                    args: ScalarFunctionArgs,
228                ) -> Result<ColumnarValue> {
229                    let args = ColumnarValue::values_to_arrays(&args.args)?;
230                    let arr: ArrayRef = match args[0].data_type() {
231                        DataType::Float64 => Arc::new(
232                            args[0]
233                                .as_primitive::<Float64Type>()
234                                .unary::<_, Float64Type>(|x: f64| f64::$UNARY_FUNC(x)),
235                        ) as ArrayRef,
236                        DataType::Float32 => Arc::new(
237                            args[0]
238                                .as_primitive::<Float32Type>()
239                                .unary::<_, Float32Type>(|x: f32| f32::$UNARY_FUNC(x)),
240                        ) as ArrayRef,
241                        other => {
242                            return exec_err!(
243                                "Unsupported data type {other:?} for function {}",
244                                self.name()
245                            )
246                        }
247                    };
248
249                    Ok(ColumnarValue::Array(arr))
250                }
251
252                fn documentation(&self) -> Option<&Documentation> {
253                    Some($GET_DOC())
254                }
255            }
256        }
257    };
258}
259
260/// Macro to create a binary math UDF.
261///
262/// A binary math function takes two arguments of types Float32 or Float64,
263/// applies a binary floating function to the argument, and returns a value of the same type.
264///
265/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
266/// $NAME: the name of the function
267/// $BINARY_FUNC: the binary function to apply to the argument
268/// $OUTPUT_ORDERING: the output ordering calculation method of the function
269/// $GET_DOC: the function to get the documentation of the UDF
270macro_rules! make_math_binary_udf {
271    ($UDF:ident, $NAME:ident, $BINARY_FUNC:ident, $OUTPUT_ORDERING:expr, $GET_DOC:expr) => {
272        make_udf_function!($NAME::$UDF, $NAME);
273
274        mod $NAME {
275            use std::any::Any;
276            use std::sync::Arc;
277
278            use arrow::array::{ArrayRef, AsArray};
279            use arrow::datatypes::{DataType, Float32Type, Float64Type};
280            use datafusion_common::{exec_err, Result};
281            use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
282            use datafusion_expr::TypeSignature;
283            use datafusion_expr::{
284                ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
285                Signature, Volatility,
286            };
287
288            #[derive(Debug, PartialEq, Eq, Hash)]
289            pub struct $UDF {
290                signature: Signature,
291            }
292
293            impl $UDF {
294                pub fn new() -> Self {
295                    use DataType::*;
296                    Self {
297                        signature: Signature::one_of(
298                            vec![
299                                TypeSignature::Exact(vec![Float32, Float32]),
300                                TypeSignature::Exact(vec![Float64, Float64]),
301                            ],
302                            Volatility::Immutable,
303                        ),
304                    }
305                }
306            }
307
308            impl ScalarUDFImpl for $UDF {
309                fn as_any(&self) -> &dyn Any {
310                    self
311                }
312                fn name(&self) -> &str {
313                    stringify!($NAME)
314                }
315
316                fn signature(&self) -> &Signature {
317                    &self.signature
318                }
319
320                fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
321                    let arg_type = &arg_types[0];
322
323                    match arg_type {
324                        DataType::Float32 => Ok(DataType::Float32),
325                        // For other types (possible values float64/null/int), use Float64
326                        _ => Ok(DataType::Float64),
327                    }
328                }
329
330                fn output_ordering(
331                    &self,
332                    input: &[ExprProperties],
333                ) -> Result<SortProperties> {
334                    $OUTPUT_ORDERING(input)
335                }
336
337                fn invoke_with_args(
338                    &self,
339                    args: ScalarFunctionArgs,
340                ) -> Result<ColumnarValue> {
341                    let args = ColumnarValue::values_to_arrays(&args.args)?;
342                    let arr: ArrayRef = match args[0].data_type() {
343                        DataType::Float64 => {
344                            let y = args[0].as_primitive::<Float64Type>();
345                            let x = args[1].as_primitive::<Float64Type>();
346                            let result = arrow::compute::binary::<_, _, _, Float64Type>(
347                                y,
348                                x,
349                                |y, x| f64::$BINARY_FUNC(y, x),
350                            )?;
351                            Arc::new(result) as _
352                        }
353                        DataType::Float32 => {
354                            let y = args[0].as_primitive::<Float32Type>();
355                            let x = args[1].as_primitive::<Float32Type>();
356                            let result = arrow::compute::binary::<_, _, _, Float32Type>(
357                                y,
358                                x,
359                                |y, x| f32::$BINARY_FUNC(y, x),
360                            )?;
361                            Arc::new(result) as _
362                        }
363                        other => {
364                            return exec_err!(
365                                "Unsupported data type {other:?} for function {}",
366                                self.name()
367                            )
368                        }
369                    };
370
371                    Ok(ColumnarValue::Array(arr))
372                }
373
374                fn documentation(&self) -> Option<&Documentation> {
375                    Some($GET_DOC())
376                }
377            }
378        }
379    };
380}