datafusion_functions/macros.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18/// macro that exports a list of function names as:
19/// 1. individual functions in an `expr_fn` module
20/// 2. a single function that returns a list of all functions
21///
22/// Equivalent to
23/// ```text
24/// pub mod expr_fn {
25/// use super::*;
26/// /// Return encode(arg)
27/// pub fn encode(args: Vec<Expr>) -> Expr {
28/// super::encode().call(args)
29/// }
30/// ...
31/// /// Return a list of all functions in this package
32/// pub(crate) fn functions() -> Vec<Arc<ScalarUDF>> {
33/// vec![
34/// encode(),
35/// decode()
36/// ]
37/// }
38/// ```
39///
40/// Exported functions accept:
41/// - `Vec<Expr>` argument (single argument followed by a comma)
42/// - Variable number of `Expr` arguments (zero or more arguments, must be without commas)
43/// - Functions that require config (marked with `@config` prefix)
44///
45/// Note on configuration construction paths:
46/// - The convenience wrappers generated for `@config` functions call the inner
47/// constructor with `ConfigOptions::default()`. These wrappers are intended
48/// primarily for programmatic `Expr` construction and convenience usage.
49/// - When functions are registered in a session, DataFusion will call
50/// `with_updated_config()` to create a `ScalarUDF` instance using the session's
51/// actual `ConfigOptions`. This also happens when configuration changes at runtime
52/// (e.g., via `SET` statements). In short: the macro uses the default config for
53/// convenience constructors; the session config is applied when functions are
54/// registered or when configuration is updated.
55#[macro_export]
56macro_rules! export_functions {
57 ($(($FUNC:ident, $DOC:expr, $($arg:tt)*)),*) => {
58 $(
59 // switch to single-function cases below
60 $crate::export_functions!(single $FUNC, $DOC, $($arg)*);
61 )*
62 };
63
64 // function that requires config (marked with @config)
65 (single $FUNC:ident, $DOC:expr, @config) => {
66 #[doc = $DOC]
67 pub fn $FUNC() -> datafusion_expr::Expr {
68 use datafusion_common::config::ConfigOptions;
69 super::$FUNC(&ConfigOptions::default()).call(vec![])
70 }
71 };
72
73 // function that requires config and takes a vector argument
74 (single $FUNC:ident, $DOC:expr, @config $arg:ident,) => {
75 #[doc = $DOC]
76 pub fn $FUNC($arg: Vec<datafusion_expr::Expr>) -> datafusion_expr::Expr {
77 use datafusion_common::config::ConfigOptions;
78 super::$FUNC(&ConfigOptions::default()).call($arg)
79 }
80 };
81
82 // function that requires config and variadic arguments
83 (single $FUNC:ident, $DOC:expr, @config $($arg:ident)*) => {
84 #[doc = $DOC]
85 pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
86 use datafusion_common::config::ConfigOptions;
87 super::$FUNC(&ConfigOptions::default()).call(vec![$($arg),*])
88 }
89 };
90
91 // single vector argument (a single argument followed by a comma)
92 (single $FUNC:ident, $DOC:expr, $arg:ident,) => {
93 #[doc = $DOC]
94 pub fn $FUNC($arg: Vec<datafusion_expr::Expr>) -> datafusion_expr::Expr {
95 super::$FUNC().call($arg)
96 }
97 };
98
99 // variadic arguments (zero or more arguments, without commas)
100 (single $FUNC:ident, $DOC:expr, $($arg:ident)*) => {
101 #[doc = $DOC]
102 pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
103 super::$FUNC().call(vec![$($arg),*])
104 }
105 };
106}
107
108/// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
109/// named `$NAME` which returns that singleton. Optionally use a custom constructor
110/// `$CTOR` which defaults to `$UDF::new()` if not specified.
111///
112/// This is used to ensure creating the list of `ScalarUDF` only happens once.
113#[macro_export]
114macro_rules! make_udf_function {
115 ($UDF:ty, $NAME:ident, $CTOR:expr) => {
116 #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
117 pub fn $NAME() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
118 // Singleton instance of the function
119 static INSTANCE: std::sync::LazyLock<
120 std::sync::Arc<datafusion_expr::ScalarUDF>,
121 > = std::sync::LazyLock::new(|| {
122 std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
123 ($CTOR)(),
124 ))
125 });
126 std::sync::Arc::clone(&INSTANCE)
127 }
128 };
129 ($UDF:ty, $NAME:ident) => {
130 make_udf_function!($UDF, $NAME, <$UDF>::new);
131 };
132}
133
134/// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
135/// named `$NAME` which returns that singleton. The function takes a
136/// configuration argument of type `$CONFIG_TYPE` to create the UDF.
137#[macro_export]
138macro_rules! make_udf_function_with_config {
139 ($UDF:ty, $NAME:ident) => {
140 #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
141 pub fn $NAME(config: &datafusion_common::config::ConfigOptions) -> std::sync::Arc<datafusion_expr::ScalarUDF> {
142 std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
143 <$UDF>::new_with_config(&config),
144 ))
145 }
146 };
147}
148
149/// Macro creates a sub module if the feature is not enabled
150///
151/// The rationale for providing stub functions is to help users to configure datafusion
152/// properly (so they get an error telling them why a function is not available)
153/// instead of getting a cryptic "no function found" message at runtime.
154macro_rules! make_stub_package {
155 ($name:ident, $feature:literal) => {
156 #[cfg(not(feature = $feature))]
157 #[doc = concat!("Disabled. Enable via feature flag `", $feature, "`")]
158 pub mod $name {
159 use datafusion_expr::ScalarUDF;
160 use log::debug;
161 use std::sync::Arc;
162
163 /// Returns an empty list of functions when the feature is not enabled
164 pub fn functions() -> Vec<Arc<ScalarUDF>> {
165 debug!("{} functions disabled", stringify!($name));
166 vec![]
167 }
168 }
169 };
170}
171
172/// Downcast a named argument to a specific array type, returning an internal error
173/// if the cast fails
174///
175/// $ARG: ArrayRef
176/// $NAME: name of the argument (for error messages)
177/// $ARRAY_TYPE: the type of array to cast the argument to
178#[macro_export]
179macro_rules! downcast_named_arg {
180 ($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{
181 $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
182 datafusion_common::internal_datafusion_err!(
183 "could not cast {} to {}",
184 $NAME,
185 std::any::type_name::<$ARRAY_TYPE>()
186 )
187 })?
188 }};
189}
190
191/// Downcast an argument to a specific array type, returning an internal error
192/// if the cast fails
193///
194/// $ARG: ArrayRef
195/// $ARRAY_TYPE: the type of array to cast the argument to
196#[macro_export]
197macro_rules! downcast_arg {
198 ($ARG:expr, $ARRAY_TYPE:ident) => {{ $crate::downcast_named_arg!($ARG, "", $ARRAY_TYPE) }};
199}
200
201/// Macro to create a unary math UDF.
202///
203/// A unary math function takes an argument of type Float32 or Float64,
204/// applies a unary floating function to the argument, and returns a value of the same type.
205///
206/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
207/// $NAME: the name of the function
208/// $UNARY_FUNC: the unary function to apply to the argument
209/// $OUTPUT_ORDERING: the output ordering calculation method of the function
210/// $GET_DOC: the function to get the documentation of the UDF
211macro_rules! make_math_unary_udf {
212 ($UDF:ident, $NAME:ident, $UNARY_FUNC:ident, $OUTPUT_ORDERING:expr, $EVALUATE_BOUNDS:expr, $GET_DOC:expr) => {
213 $crate::make_udf_function!($NAME::$UDF, $NAME);
214
215 mod $NAME {
216
217 use std::sync::Arc;
218
219 use arrow::array::{ArrayRef, AsArray};
220 use arrow::datatypes::{DataType, Float32Type, Float64Type};
221 use datafusion_common::{Result, exec_err};
222 use datafusion_expr::interval_arithmetic::Interval;
223 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
224 use datafusion_expr::{
225 ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
226 Signature, Volatility,
227 };
228
229 #[derive(Debug, PartialEq, Eq, Hash)]
230 pub struct $UDF {
231 signature: Signature,
232 }
233
234 impl $UDF {
235 pub fn new() -> Self {
236 use DataType::*;
237 Self {
238 signature: Signature::uniform(
239 1,
240 vec![Float64, Float32],
241 Volatility::Immutable,
242 ),
243 }
244 }
245 }
246
247 impl ScalarUDFImpl for $UDF {
248 fn name(&self) -> &str {
249 stringify!($NAME)
250 }
251
252 fn signature(&self) -> &Signature {
253 &self.signature
254 }
255
256 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
257 let arg_type = &arg_types[0];
258
259 match arg_type {
260 DataType::Float32 => Ok(DataType::Float32),
261 // For other types (possible values float64/null/int), use Float64
262 _ => Ok(DataType::Float64),
263 }
264 }
265
266 fn output_ordering(
267 &self,
268 input: &[ExprProperties],
269 ) -> Result<SortProperties> {
270 $OUTPUT_ORDERING(input)
271 }
272
273 fn evaluate_bounds(&self, inputs: &[&Interval]) -> Result<Interval> {
274 $EVALUATE_BOUNDS(inputs)
275 }
276
277 fn invoke_with_args(
278 &self,
279 args: ScalarFunctionArgs,
280 ) -> Result<ColumnarValue> {
281 let args = ColumnarValue::values_to_arrays(&args.args)?;
282 let arr: ArrayRef = match args[0].data_type() {
283 DataType::Float64 => Arc::new(
284 args[0]
285 .as_primitive::<Float64Type>()
286 .unary::<_, Float64Type>(|x: f64| f64::$UNARY_FUNC(x)),
287 ) as ArrayRef,
288 DataType::Float32 => Arc::new(
289 args[0]
290 .as_primitive::<Float32Type>()
291 .unary::<_, Float32Type>(|x: f32| f32::$UNARY_FUNC(x)),
292 ) as ArrayRef,
293 other => {
294 return exec_err!(
295 "Unsupported data type {other:?} for function {}",
296 self.name()
297 );
298 }
299 };
300
301 Ok(ColumnarValue::Array(arr))
302 }
303
304 fn documentation(&self) -> Option<&Documentation> {
305 Some($GET_DOC())
306 }
307 }
308 }
309 };
310}
311
312/// Macro to create a binary math UDF.
313///
314/// A binary math function takes two arguments of types Float32 or Float64,
315/// applies a binary floating function to the argument, and returns a value of the same type.
316///
317/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
318/// $NAME: the name of the function
319/// $BINARY_FUNC: the binary function to apply to the argument
320/// $OUTPUT_ORDERING: the output ordering calculation method of the function
321/// $GET_DOC: the function to get the documentation of the UDF
322macro_rules! make_math_binary_udf {
323 ($UDF:ident, $NAME:ident, $BINARY_FUNC:ident, $OUTPUT_ORDERING:expr, $GET_DOC:expr) => {
324 $crate::make_udf_function!($NAME::$UDF, $NAME);
325
326 mod $NAME {
327
328 use std::sync::Arc;
329
330 use arrow::array::{ArrayRef, AsArray};
331 use arrow::datatypes::{DataType, Float32Type, Float64Type};
332 use datafusion_common::utils::take_function_args;
333 use datafusion_common::{Result, ScalarValue, internal_err};
334 use datafusion_expr::TypeSignature;
335 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
336 use datafusion_expr::{
337 ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
338 Signature, Volatility,
339 };
340
341 #[derive(Debug, PartialEq, Eq, Hash)]
342 pub struct $UDF {
343 signature: Signature,
344 }
345
346 impl $UDF {
347 pub fn new() -> Self {
348 use DataType::*;
349 Self {
350 signature: Signature::one_of(
351 vec![
352 TypeSignature::Exact(vec![Float32, Float32]),
353 TypeSignature::Exact(vec![Float64, Float64]),
354 ],
355 Volatility::Immutable,
356 ),
357 }
358 }
359 }
360
361 impl ScalarUDFImpl for $UDF {
362 fn name(&self) -> &str {
363 stringify!($NAME)
364 }
365
366 fn signature(&self) -> &Signature {
367 &self.signature
368 }
369
370 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
371 let arg_type = &arg_types[0];
372
373 match arg_type {
374 DataType::Float32 => Ok(DataType::Float32),
375 // For other types (possible values float64/null/int), use Float64
376 _ => Ok(DataType::Float64),
377 }
378 }
379
380 fn output_ordering(
381 &self,
382 input: &[ExprProperties],
383 ) -> Result<SortProperties> {
384 $OUTPUT_ORDERING(input)
385 }
386
387 fn invoke_with_args(
388 &self,
389 args: ScalarFunctionArgs,
390 ) -> Result<ColumnarValue> {
391 let ScalarFunctionArgs {
392 args, return_field, ..
393 } = args;
394 let return_type = return_field.data_type();
395 let [y, x] = take_function_args(self.name(), args)?;
396
397 match (y, x) {
398 (
399 ColumnarValue::Scalar(y_scalar),
400 ColumnarValue::Scalar(x_scalar),
401 ) => match (&y_scalar, &x_scalar) {
402 (y, x) if y.is_null() || x.is_null() => {
403 ColumnarValue::Scalar(ScalarValue::Null)
404 .cast_to(return_type, None)
405 }
406 (
407 ScalarValue::Float64(Some(yv)),
408 ScalarValue::Float64(Some(xv)),
409 ) => Ok(ColumnarValue::Scalar(ScalarValue::Float64(Some(
410 f64::$BINARY_FUNC(*yv, *xv),
411 )))),
412 (
413 ScalarValue::Float32(Some(yv)),
414 ScalarValue::Float32(Some(xv)),
415 ) => Ok(ColumnarValue::Scalar(ScalarValue::Float32(Some(
416 f32::$BINARY_FUNC(*yv, *xv),
417 )))),
418 _ => internal_err!(
419 "Unexpected scalar types for function {}: {:?}, {:?}",
420 self.name(),
421 y_scalar.data_type(),
422 x_scalar.data_type()
423 ),
424 },
425 (y, x) => {
426 let args = ColumnarValue::values_to_arrays(&[y, x])?;
427 let arr: ArrayRef = match args[0].data_type() {
428 DataType::Float64 => {
429 let y = args[0].as_primitive::<Float64Type>();
430 let x = args[1].as_primitive::<Float64Type>();
431 let result =
432 arrow::compute::binary::<_, _, _, Float64Type>(
433 y,
434 x,
435 |y, x| f64::$BINARY_FUNC(y, x),
436 )?;
437 Arc::new(result) as _
438 }
439 DataType::Float32 => {
440 let y = args[0].as_primitive::<Float32Type>();
441 let x = args[1].as_primitive::<Float32Type>();
442 let result =
443 arrow::compute::binary::<_, _, _, Float32Type>(
444 y,
445 x,
446 |y, x| f32::$BINARY_FUNC(y, x),
447 )?;
448 Arc::new(result) as _
449 }
450 other => {
451 return internal_err!(
452 "Unsupported data type {other:?} for function {}",
453 self.name()
454 );
455 }
456 };
457
458 Ok(ColumnarValue::Array(arr))
459 }
460 }
461 }
462
463 fn documentation(&self) -> Option<&Documentation> {
464 Some($GET_DOC())
465 }
466 }
467 }
468 };
469}