datafusion_functions/macros.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18/// macro that exports a list of function names as:
19/// 1. individual functions in an `expr_fn` module
20/// 2. a single function that returns a list of all functions
21///
22/// Equivalent to
23/// ```text
24/// pub mod expr_fn {
25/// use super::*;
26/// /// Return encode(arg)
27/// pub fn encode(args: Vec<Expr>) -> Expr {
28/// super::encode().call(args)
29/// }
30/// ...
31/// /// Return a list of all functions in this package
32/// pub(crate) fn functions() -> Vec<Arc<ScalarUDF>> {
33/// vec![
34/// encode(),
35/// decode()
36/// ]
37/// }
38/// ```
39///
40/// Exported functions accept:
41/// - `Vec<Expr>` argument (single argument followed by a comma)
42/// - Variable number of `Expr` arguments (zero or more arguments, must be without commas)
43/// - Functions that require config (marked with `@config` prefix)
44///
45/// Note on configuration construction paths:
46/// - The convenience wrappers generated for `@config` functions call the inner
47/// constructor with `ConfigOptions::default()`. These wrappers are intended
48/// primarily for programmatic `Expr` construction and convenience usage.
49/// - When functions are registered in a session, DataFusion will call
50/// `with_updated_config()` to create a `ScalarUDF` instance using the session's
51/// actual `ConfigOptions`. This also happens when configuration changes at runtime
52/// (e.g., via `SET` statements). In short: the macro uses the default config for
53/// convenience constructors; the session config is applied when functions are
54/// registered or when configuration is updated.
55#[macro_export]
56macro_rules! export_functions {
57 ($(($FUNC:ident, $DOC:expr, $($arg:tt)*)),*) => {
58 $(
59 // switch to single-function cases below
60 $crate::export_functions!(single $FUNC, $DOC, $($arg)*);
61 )*
62 };
63
64 // function that requires config (marked with @config)
65 (single $FUNC:ident, $DOC:expr, @config) => {
66 #[doc = $DOC]
67 pub fn $FUNC() -> datafusion_expr::Expr {
68 use datafusion_common::config::ConfigOptions;
69 super::$FUNC(&ConfigOptions::default()).call(vec![])
70 }
71 };
72
73 // function that requires config and takes a vector argument
74 (single $FUNC:ident, $DOC:expr, @config $arg:ident,) => {
75 #[doc = $DOC]
76 pub fn $FUNC($arg: Vec<datafusion_expr::Expr>) -> datafusion_expr::Expr {
77 use datafusion_common::config::ConfigOptions;
78 super::$FUNC(&ConfigOptions::default()).call($arg)
79 }
80 };
81
82 // function that requires config and variadic arguments
83 (single $FUNC:ident, $DOC:expr, @config $($arg:ident)*) => {
84 #[doc = $DOC]
85 pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
86 use datafusion_common::config::ConfigOptions;
87 super::$FUNC(&ConfigOptions::default()).call(vec![$($arg),*])
88 }
89 };
90
91 // single vector argument (a single argument followed by a comma)
92 (single $FUNC:ident, $DOC:expr, $arg:ident,) => {
93 #[doc = $DOC]
94 pub fn $FUNC($arg: Vec<datafusion_expr::Expr>) -> datafusion_expr::Expr {
95 super::$FUNC().call($arg)
96 }
97 };
98
99 // variadic arguments (zero or more arguments, without commas)
100 (single $FUNC:ident, $DOC:expr, $($arg:ident)*) => {
101 #[doc = $DOC]
102 pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
103 super::$FUNC().call(vec![$($arg),*])
104 }
105 };
106}
107
108/// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
109/// named `$NAME` which returns that singleton. Optionally use a custom constructor
110/// `$CTOR` which defaults to `$UDF::new()` if not specified.
111///
112/// This is used to ensure creating the list of `ScalarUDF` only happens once.
113#[macro_export]
114macro_rules! make_udf_function {
115 ($UDF:ty, $NAME:ident, $CTOR:expr) => {
116 #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
117 pub fn $NAME() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
118 // Singleton instance of the function
119 static INSTANCE: std::sync::LazyLock<
120 std::sync::Arc<datafusion_expr::ScalarUDF>,
121 > = std::sync::LazyLock::new(|| {
122 std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
123 ($CTOR)(),
124 ))
125 });
126 std::sync::Arc::clone(&INSTANCE)
127 }
128 };
129 ($UDF:ty, $NAME:ident) => {
130 make_udf_function!($UDF, $NAME, <$UDF>::new);
131 };
132}
133
134/// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
135/// named `$NAME` which returns that singleton. The function takes a
136/// configuration argument of type `$CONFIG_TYPE` to create the UDF.
137#[macro_export]
138macro_rules! make_udf_function_with_config {
139 ($UDF:ty, $NAME:ident) => {
140 #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
141 pub fn $NAME(config: &datafusion_common::config::ConfigOptions) -> std::sync::Arc<datafusion_expr::ScalarUDF> {
142 std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
143 <$UDF>::new_with_config(&config),
144 ))
145 }
146 };
147}
148
149/// Macro creates a sub module if the feature is not enabled
150///
151/// The rationale for providing stub functions is to help users to configure datafusion
152/// properly (so they get an error telling them why a function is not available)
153/// instead of getting a cryptic "no function found" message at runtime.
154macro_rules! make_stub_package {
155 ($name:ident, $feature:literal) => {
156 #[cfg(not(feature = $feature))]
157 #[doc = concat!("Disabled. Enable via feature flag `", $feature, "`")]
158 pub mod $name {
159 use datafusion_expr::ScalarUDF;
160 use log::debug;
161 use std::sync::Arc;
162
163 /// Returns an empty list of functions when the feature is not enabled
164 pub fn functions() -> Vec<Arc<ScalarUDF>> {
165 debug!("{} functions disabled", stringify!($name));
166 vec![]
167 }
168 }
169 };
170}
171
172/// Downcast a named argument to a specific array type, returning an internal error
173/// if the cast fails
174///
175/// $ARG: ArrayRef
176/// $NAME: name of the argument (for error messages)
177/// $ARRAY_TYPE: the type of array to cast the argument to
178#[macro_export]
179macro_rules! downcast_named_arg {
180 ($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{
181 $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
182 datafusion_common::internal_datafusion_err!(
183 "could not cast {} to {}",
184 $NAME,
185 std::any::type_name::<$ARRAY_TYPE>()
186 )
187 })?
188 }};
189}
190
191/// Downcast an argument to a specific array type, returning an internal error
192/// if the cast fails
193///
194/// $ARG: ArrayRef
195/// $ARRAY_TYPE: the type of array to cast the argument to
196#[macro_export]
197macro_rules! downcast_arg {
198 ($ARG:expr, $ARRAY_TYPE:ident) => {{ $crate::downcast_named_arg!($ARG, "", $ARRAY_TYPE) }};
199}
200
201/// Macro to create a unary math UDF.
202///
203/// A unary math function takes an argument of type Float32 or Float64,
204/// applies a unary floating function to the argument, and returns a value of the same type.
205///
206/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
207/// $NAME: the name of the function
208/// $UNARY_FUNC: the unary function to apply to the argument
209/// $OUTPUT_ORDERING: the output ordering calculation method of the function
210/// $GET_DOC: the function to get the documentation of the UDF
211macro_rules! make_math_unary_udf {
212 ($UDF:ident, $NAME:ident, $UNARY_FUNC:ident, $OUTPUT_ORDERING:expr, $EVALUATE_BOUNDS:expr, $GET_DOC:expr) => {
213 $crate::make_udf_function!($NAME::$UDF, $NAME);
214
215 mod $NAME {
216 use std::any::Any;
217 use std::sync::Arc;
218
219 use arrow::array::{ArrayRef, AsArray};
220 use arrow::datatypes::{DataType, Float32Type, Float64Type};
221 use datafusion_common::{Result, exec_err};
222 use datafusion_expr::interval_arithmetic::Interval;
223 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
224 use datafusion_expr::{
225 ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
226 Signature, Volatility,
227 };
228
229 #[derive(Debug, PartialEq, Eq, Hash)]
230 pub struct $UDF {
231 signature: Signature,
232 }
233
234 impl $UDF {
235 pub fn new() -> Self {
236 use DataType::*;
237 Self {
238 signature: Signature::uniform(
239 1,
240 vec![Float64, Float32],
241 Volatility::Immutable,
242 ),
243 }
244 }
245 }
246
247 impl ScalarUDFImpl for $UDF {
248 fn as_any(&self) -> &dyn Any {
249 self
250 }
251 fn name(&self) -> &str {
252 stringify!($NAME)
253 }
254
255 fn signature(&self) -> &Signature {
256 &self.signature
257 }
258
259 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
260 let arg_type = &arg_types[0];
261
262 match arg_type {
263 DataType::Float32 => Ok(DataType::Float32),
264 // For other types (possible values float64/null/int), use Float64
265 _ => Ok(DataType::Float64),
266 }
267 }
268
269 fn output_ordering(
270 &self,
271 input: &[ExprProperties],
272 ) -> Result<SortProperties> {
273 $OUTPUT_ORDERING(input)
274 }
275
276 fn evaluate_bounds(&self, inputs: &[&Interval]) -> Result<Interval> {
277 $EVALUATE_BOUNDS(inputs)
278 }
279
280 fn invoke_with_args(
281 &self,
282 args: ScalarFunctionArgs,
283 ) -> Result<ColumnarValue> {
284 let args = ColumnarValue::values_to_arrays(&args.args)?;
285 let arr: ArrayRef = match args[0].data_type() {
286 DataType::Float64 => Arc::new(
287 args[0]
288 .as_primitive::<Float64Type>()
289 .unary::<_, Float64Type>(|x: f64| f64::$UNARY_FUNC(x)),
290 ) as ArrayRef,
291 DataType::Float32 => Arc::new(
292 args[0]
293 .as_primitive::<Float32Type>()
294 .unary::<_, Float32Type>(|x: f32| f32::$UNARY_FUNC(x)),
295 ) as ArrayRef,
296 other => {
297 return exec_err!(
298 "Unsupported data type {other:?} for function {}",
299 self.name()
300 );
301 }
302 };
303
304 Ok(ColumnarValue::Array(arr))
305 }
306
307 fn documentation(&self) -> Option<&Documentation> {
308 Some($GET_DOC())
309 }
310 }
311 }
312 };
313}
314
315/// Macro to create a binary math UDF.
316///
317/// A binary math function takes two arguments of types Float32 or Float64,
318/// applies a binary floating function to the argument, and returns a value of the same type.
319///
320/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
321/// $NAME: the name of the function
322/// $BINARY_FUNC: the binary function to apply to the argument
323/// $OUTPUT_ORDERING: the output ordering calculation method of the function
324/// $GET_DOC: the function to get the documentation of the UDF
325macro_rules! make_math_binary_udf {
326 ($UDF:ident, $NAME:ident, $BINARY_FUNC:ident, $OUTPUT_ORDERING:expr, $GET_DOC:expr) => {
327 $crate::make_udf_function!($NAME::$UDF, $NAME);
328
329 mod $NAME {
330 use std::any::Any;
331 use std::sync::Arc;
332
333 use arrow::array::{ArrayRef, AsArray};
334 use arrow::datatypes::{DataType, Float32Type, Float64Type};
335 use datafusion_common::{Result, exec_err};
336 use datafusion_expr::TypeSignature;
337 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
338 use datafusion_expr::{
339 ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
340 Signature, Volatility,
341 };
342
343 #[derive(Debug, PartialEq, Eq, Hash)]
344 pub struct $UDF {
345 signature: Signature,
346 }
347
348 impl $UDF {
349 pub fn new() -> Self {
350 use DataType::*;
351 Self {
352 signature: Signature::one_of(
353 vec![
354 TypeSignature::Exact(vec![Float32, Float32]),
355 TypeSignature::Exact(vec![Float64, Float64]),
356 ],
357 Volatility::Immutable,
358 ),
359 }
360 }
361 }
362
363 impl ScalarUDFImpl for $UDF {
364 fn as_any(&self) -> &dyn Any {
365 self
366 }
367 fn name(&self) -> &str {
368 stringify!($NAME)
369 }
370
371 fn signature(&self) -> &Signature {
372 &self.signature
373 }
374
375 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
376 let arg_type = &arg_types[0];
377
378 match arg_type {
379 DataType::Float32 => Ok(DataType::Float32),
380 // For other types (possible values float64/null/int), use Float64
381 _ => Ok(DataType::Float64),
382 }
383 }
384
385 fn output_ordering(
386 &self,
387 input: &[ExprProperties],
388 ) -> Result<SortProperties> {
389 $OUTPUT_ORDERING(input)
390 }
391
392 fn invoke_with_args(
393 &self,
394 args: ScalarFunctionArgs,
395 ) -> Result<ColumnarValue> {
396 let args = ColumnarValue::values_to_arrays(&args.args)?;
397 let arr: ArrayRef = match args[0].data_type() {
398 DataType::Float64 => {
399 let y = args[0].as_primitive::<Float64Type>();
400 let x = args[1].as_primitive::<Float64Type>();
401 let result = arrow::compute::binary::<_, _, _, Float64Type>(
402 y,
403 x,
404 |y, x| f64::$BINARY_FUNC(y, x),
405 )?;
406 Arc::new(result) as _
407 }
408 DataType::Float32 => {
409 let y = args[0].as_primitive::<Float32Type>();
410 let x = args[1].as_primitive::<Float32Type>();
411 let result = arrow::compute::binary::<_, _, _, Float32Type>(
412 y,
413 x,
414 |y, x| f32::$BINARY_FUNC(y, x),
415 )?;
416 Arc::new(result) as _
417 }
418 other => {
419 return exec_err!(
420 "Unsupported data type {other:?} for function {}",
421 self.name()
422 );
423 }
424 };
425
426 Ok(ColumnarValue::Array(arr))
427 }
428
429 fn documentation(&self) -> Option<&Documentation> {
430 Some($GET_DOC())
431 }
432 }
433 }
434 };
435}