datafusion_functions/macros.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18/// macro that exports a list of function names as:
19/// 1. individual functions in an `expr_fn` module
20/// 2. a single function that returns a list of all functions
21///
22/// Equivalent to
23/// ```text
24/// pub mod expr_fn {
25/// use super::*;
26/// /// Return encode(arg)
27/// pub fn encode(args: Vec<Expr>) -> Expr {
28/// super::encode().call(args)
29/// }
30/// ...
31/// /// Return a list of all functions in this package
32/// pub(crate) fn functions() -> Vec<Arc<ScalarUDF>> {
33/// vec![
34/// encode(),
35/// decode()
36/// ]
37/// }
38/// ```
39///
40/// Exported functions accept:
41/// - `Vec<Expr>` argument (single argument followed by a comma)
42/// - Variable number of `Expr` arguments (zero or more arguments, must be without commas)
43/// - Functions that require config (marked with `@config` prefix)
44///
45/// Note on configuration construction paths:
46/// - The convenience wrappers generated for `@config` functions call the inner
47/// constructor with `ConfigOptions::default()`. These wrappers are intended
48/// primarily for programmatic `Expr` construction and convenience usage.
49/// - When functions are registered in a session, DataFusion will call
50/// `with_updated_config()` to create a `ScalarUDF` instance using the session's
51/// actual `ConfigOptions`. This also happens when configuration changes at runtime
52/// (e.g., via `SET` statements). In short: the macro uses the default config for
53/// convenience constructors; the session config is applied when functions are
54/// registered or when configuration is updated.
55#[macro_export]
56macro_rules! export_functions {
57 ($(($FUNC:ident, $DOC:expr, $($arg:tt)*)),*) => {
58 $(
59 // switch to single-function cases below
60 $crate::export_functions!(single $FUNC, $DOC, $($arg)*);
61 )*
62 };
63
64 // function that requires config (marked with @config)
65 (single $FUNC:ident, $DOC:expr, @config) => {
66 #[doc = $DOC]
67 pub fn $FUNC() -> datafusion_expr::Expr {
68 use datafusion_common::config::ConfigOptions;
69 super::$FUNC(&ConfigOptions::default()).call(vec![])
70 }
71 };
72
73 // function that requires config and takes a vector argument
74 (single $FUNC:ident, $DOC:expr, @config $arg:ident,) => {
75 #[doc = $DOC]
76 pub fn $FUNC($arg: Vec<datafusion_expr::Expr>) -> datafusion_expr::Expr {
77 use datafusion_common::config::ConfigOptions;
78 super::$FUNC(&ConfigOptions::default()).call($arg)
79 }
80 };
81
82 // function that requires config and variadic arguments
83 (single $FUNC:ident, $DOC:expr, @config $($arg:ident)*) => {
84 #[doc = $DOC]
85 pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
86 use datafusion_common::config::ConfigOptions;
87 super::$FUNC(&ConfigOptions::default()).call(vec![$($arg),*])
88 }
89 };
90
91 // single vector argument (a single argument followed by a comma)
92 (single $FUNC:ident, $DOC:expr, $arg:ident,) => {
93 #[doc = $DOC]
94 pub fn $FUNC($arg: Vec<datafusion_expr::Expr>) -> datafusion_expr::Expr {
95 super::$FUNC().call($arg)
96 }
97 };
98
99 // variadic arguments (zero or more arguments, without commas)
100 (single $FUNC:ident, $DOC:expr, $($arg:ident)*) => {
101 #[doc = $DOC]
102 pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
103 super::$FUNC().call(vec![$($arg),*])
104 }
105 };
106}
107
108/// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
109/// named `$NAME` which returns that singleton. Optionally use a custom constructor
110/// `$CTOR` which defaults to `$UDF::new()` if not specified.
111///
112/// This is used to ensure creating the list of `ScalarUDF` only happens once.
113#[macro_export]
114macro_rules! make_udf_function {
115 ($UDF:ty, $NAME:ident, $CTOR:expr) => {
116 #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
117 pub fn $NAME() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
118 // Singleton instance of the function
119 static INSTANCE: std::sync::LazyLock<
120 std::sync::Arc<datafusion_expr::ScalarUDF>,
121 > = std::sync::LazyLock::new(|| {
122 std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
123 ($CTOR)(),
124 ))
125 });
126 std::sync::Arc::clone(&INSTANCE)
127 }
128 };
129 ($UDF:ty, $NAME:ident) => {
130 make_udf_function!($UDF, $NAME, <$UDF>::new);
131 };
132}
133
134/// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
135/// named `$NAME` which returns that singleton. The function takes a
136/// configuration argument of type `$CONFIG_TYPE` to create the UDF.
137#[macro_export]
138macro_rules! make_udf_function_with_config {
139 ($UDF:ty, $NAME:ident) => {
140 #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
141 pub fn $NAME(config: &datafusion_common::config::ConfigOptions) -> std::sync::Arc<datafusion_expr::ScalarUDF> {
142 std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
143 <$UDF>::new_with_config(&config),
144 ))
145 }
146 };
147}
148
149/// Macro creates a sub module if the feature is not enabled
150///
151/// The rationale for providing stub functions is to help users to configure datafusion
152/// properly (so they get an error telling them why a function is not available)
153/// instead of getting a cryptic "no function found" message at runtime.
154macro_rules! make_stub_package {
155 ($name:ident, $feature:literal) => {
156 #[cfg(not(feature = $feature))]
157 #[doc = concat!("Disabled. Enable via feature flag `", $feature, "`")]
158 pub mod $name {
159 use datafusion_expr::ScalarUDF;
160 use log::debug;
161 use std::sync::Arc;
162
163 /// Returns an empty list of functions when the feature is not enabled
164 pub fn functions() -> Vec<Arc<ScalarUDF>> {
165 debug!("{} functions disabled", stringify!($name));
166 vec![]
167 }
168 }
169 };
170}
171
172/// Downcast a named argument to a specific array type, returning an internal error
173/// if the cast fails
174///
175/// $ARG: ArrayRef
176/// $NAME: name of the argument (for error messages)
177/// $ARRAY_TYPE: the type of array to cast the argument to
178#[macro_export]
179macro_rules! downcast_named_arg {
180 ($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{
181 $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
182 datafusion_common::internal_datafusion_err!(
183 "could not cast {} to {}",
184 $NAME,
185 std::any::type_name::<$ARRAY_TYPE>()
186 )
187 })?
188 }};
189}
190
191/// Downcast an argument to a specific array type, returning an internal error
192/// if the cast fails
193///
194/// $ARG: ArrayRef
195/// $ARRAY_TYPE: the type of array to cast the argument to
196#[macro_export]
197macro_rules! downcast_arg {
198 ($ARG:expr, $ARRAY_TYPE:ident) => {{ $crate::downcast_named_arg!($ARG, "", $ARRAY_TYPE) }};
199}
200
201/// Macro to create a unary math UDF.
202///
203/// A unary math function takes an argument of type Float32 or Float64,
204/// applies a unary floating function to the argument, and returns a value of the same type.
205///
206/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
207/// $NAME: the name of the function
208/// $UNARY_FUNC: the unary function to apply to the argument
209/// $OUTPUT_ORDERING: the output ordering calculation method of the function
210/// $GET_DOC: the function to get the documentation of the UDF
211macro_rules! make_math_unary_udf {
212 ($UDF:ident, $NAME:ident, $UNARY_FUNC:ident, $OUTPUT_ORDERING:expr, $EVALUATE_BOUNDS:expr, $GET_DOC:expr) => {
213 $crate::make_udf_function!($NAME::$UDF, $NAME);
214
215 mod $NAME {
216 use std::any::Any;
217 use std::sync::Arc;
218
219 use arrow::array::{ArrayRef, AsArray};
220 use arrow::datatypes::{DataType, Float32Type, Float64Type};
221 use datafusion_common::{Result, exec_err};
222 use datafusion_expr::interval_arithmetic::Interval;
223 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
224 use datafusion_expr::{
225 ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
226 Signature, Volatility,
227 };
228
229 #[derive(Debug, PartialEq, Eq, Hash)]
230 pub struct $UDF {
231 signature: Signature,
232 }
233
234 impl $UDF {
235 pub fn new() -> Self {
236 use DataType::*;
237 Self {
238 signature: Signature::uniform(
239 1,
240 vec![Float64, Float32],
241 Volatility::Immutable,
242 ),
243 }
244 }
245 }
246
247 impl ScalarUDFImpl for $UDF {
248 fn as_any(&self) -> &dyn Any {
249 self
250 }
251 fn name(&self) -> &str {
252 stringify!($NAME)
253 }
254
255 fn signature(&self) -> &Signature {
256 &self.signature
257 }
258
259 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
260 let arg_type = &arg_types[0];
261
262 match arg_type {
263 DataType::Float32 => Ok(DataType::Float32),
264 // For other types (possible values float64/null/int), use Float64
265 _ => Ok(DataType::Float64),
266 }
267 }
268
269 fn output_ordering(
270 &self,
271 input: &[ExprProperties],
272 ) -> Result<SortProperties> {
273 $OUTPUT_ORDERING(input)
274 }
275
276 fn evaluate_bounds(&self, inputs: &[&Interval]) -> Result<Interval> {
277 $EVALUATE_BOUNDS(inputs)
278 }
279
280 fn invoke_with_args(
281 &self,
282 args: ScalarFunctionArgs,
283 ) -> Result<ColumnarValue> {
284 let args = ColumnarValue::values_to_arrays(&args.args)?;
285 let arr: ArrayRef = match args[0].data_type() {
286 DataType::Float64 => Arc::new(
287 args[0]
288 .as_primitive::<Float64Type>()
289 .unary::<_, Float64Type>(|x: f64| f64::$UNARY_FUNC(x)),
290 ) as ArrayRef,
291 DataType::Float32 => Arc::new(
292 args[0]
293 .as_primitive::<Float32Type>()
294 .unary::<_, Float32Type>(|x: f32| f32::$UNARY_FUNC(x)),
295 ) as ArrayRef,
296 other => {
297 return exec_err!(
298 "Unsupported data type {other:?} for function {}",
299 self.name()
300 );
301 }
302 };
303
304 Ok(ColumnarValue::Array(arr))
305 }
306
307 fn documentation(&self) -> Option<&Documentation> {
308 Some($GET_DOC())
309 }
310 }
311 }
312 };
313}
314
315/// Macro to create a binary math UDF.
316///
317/// A binary math function takes two arguments of types Float32 or Float64,
318/// applies a binary floating function to the argument, and returns a value of the same type.
319///
320/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
321/// $NAME: the name of the function
322/// $BINARY_FUNC: the binary function to apply to the argument
323/// $OUTPUT_ORDERING: the output ordering calculation method of the function
324/// $GET_DOC: the function to get the documentation of the UDF
325macro_rules! make_math_binary_udf {
326 ($UDF:ident, $NAME:ident, $BINARY_FUNC:ident, $OUTPUT_ORDERING:expr, $GET_DOC:expr) => {
327 $crate::make_udf_function!($NAME::$UDF, $NAME);
328
329 mod $NAME {
330 use std::any::Any;
331 use std::sync::Arc;
332
333 use arrow::array::{ArrayRef, AsArray};
334 use arrow::datatypes::{DataType, Float32Type, Float64Type};
335 use datafusion_common::utils::take_function_args;
336 use datafusion_common::{Result, ScalarValue, internal_err};
337 use datafusion_expr::TypeSignature;
338 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
339 use datafusion_expr::{
340 ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
341 Signature, Volatility,
342 };
343
344 #[derive(Debug, PartialEq, Eq, Hash)]
345 pub struct $UDF {
346 signature: Signature,
347 }
348
349 impl $UDF {
350 pub fn new() -> Self {
351 use DataType::*;
352 Self {
353 signature: Signature::one_of(
354 vec![
355 TypeSignature::Exact(vec![Float32, Float32]),
356 TypeSignature::Exact(vec![Float64, Float64]),
357 ],
358 Volatility::Immutable,
359 ),
360 }
361 }
362 }
363
364 impl ScalarUDFImpl for $UDF {
365 fn as_any(&self) -> &dyn Any {
366 self
367 }
368 fn name(&self) -> &str {
369 stringify!($NAME)
370 }
371
372 fn signature(&self) -> &Signature {
373 &self.signature
374 }
375
376 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
377 let arg_type = &arg_types[0];
378
379 match arg_type {
380 DataType::Float32 => Ok(DataType::Float32),
381 // For other types (possible values float64/null/int), use Float64
382 _ => Ok(DataType::Float64),
383 }
384 }
385
386 fn output_ordering(
387 &self,
388 input: &[ExprProperties],
389 ) -> Result<SortProperties> {
390 $OUTPUT_ORDERING(input)
391 }
392
393 fn invoke_with_args(
394 &self,
395 args: ScalarFunctionArgs,
396 ) -> Result<ColumnarValue> {
397 let ScalarFunctionArgs {
398 args, return_field, ..
399 } = args;
400 let return_type = return_field.data_type();
401 let [y, x] = take_function_args(self.name(), args)?;
402
403 match (y, x) {
404 (
405 ColumnarValue::Scalar(y_scalar),
406 ColumnarValue::Scalar(x_scalar),
407 ) => match (&y_scalar, &x_scalar) {
408 (y, x) if y.is_null() || x.is_null() => {
409 ColumnarValue::Scalar(ScalarValue::Null)
410 .cast_to(return_type, None)
411 }
412 (
413 ScalarValue::Float64(Some(yv)),
414 ScalarValue::Float64(Some(xv)),
415 ) => Ok(ColumnarValue::Scalar(ScalarValue::Float64(Some(
416 f64::$BINARY_FUNC(*yv, *xv),
417 )))),
418 (
419 ScalarValue::Float32(Some(yv)),
420 ScalarValue::Float32(Some(xv)),
421 ) => Ok(ColumnarValue::Scalar(ScalarValue::Float32(Some(
422 f32::$BINARY_FUNC(*yv, *xv),
423 )))),
424 _ => internal_err!(
425 "Unexpected scalar types for function {}: {:?}, {:?}",
426 self.name(),
427 y_scalar.data_type(),
428 x_scalar.data_type()
429 ),
430 },
431 (y, x) => {
432 let args = ColumnarValue::values_to_arrays(&[y, x])?;
433 let arr: ArrayRef = match args[0].data_type() {
434 DataType::Float64 => {
435 let y = args[0].as_primitive::<Float64Type>();
436 let x = args[1].as_primitive::<Float64Type>();
437 let result =
438 arrow::compute::binary::<_, _, _, Float64Type>(
439 y,
440 x,
441 |y, x| f64::$BINARY_FUNC(y, x),
442 )?;
443 Arc::new(result) as _
444 }
445 DataType::Float32 => {
446 let y = args[0].as_primitive::<Float32Type>();
447 let x = args[1].as_primitive::<Float32Type>();
448 let result =
449 arrow::compute::binary::<_, _, _, Float32Type>(
450 y,
451 x,
452 |y, x| f32::$BINARY_FUNC(y, x),
453 )?;
454 Arc::new(result) as _
455 }
456 other => {
457 return internal_err!(
458 "Unsupported data type {other:?} for function {}",
459 self.name()
460 );
461 }
462 };
463
464 Ok(ColumnarValue::Array(arr))
465 }
466 }
467 }
468
469 fn documentation(&self) -> Option<&Documentation> {
470 Some($GET_DOC())
471 }
472 }
473 }
474 };
475}