datafusion_functions/macros.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18/// macro that exports a list of function names as:
19/// 1. individual functions in an `expr_fn` module
20/// 2. a single function that returns a list of all functions
21///
22/// Equivalent to
23/// ```text
24/// pub mod expr_fn {
25/// use super::*;
26/// /// Return encode(arg)
27/// pub fn encode(args: Vec<Expr>) -> Expr {
28/// super::encode().call(args)
29/// }
30/// ...
31/// /// Return a list of all functions in this package
32/// pub(crate) fn functions() -> Vec<Arc<ScalarUDF>> {
33/// vec![
34/// encode(),
35/// decode()
36/// ]
37/// }
38/// ```
39///
40/// Exported functions accept:
41/// - `Vec<Expr>` argument (single argument followed by a comma)
42/// - Variable number of `Expr` arguments (zero or more arguments, must be without commas)
43/// - Functions that require config (marked with `@config` prefix)
44#[macro_export]
45macro_rules! export_functions {
46 ($(($FUNC:ident, $DOC:expr, $($arg:tt)*)),*) => {
47 $(
48 // switch to single-function cases below
49 $crate::export_functions!(single $FUNC, $DOC, $($arg)*);
50 )*
51 };
52
53 // function that requires config (marked with @config)
54 (single $FUNC:ident, $DOC:expr, @config) => {
55 #[doc = $DOC]
56 pub fn $FUNC() -> datafusion_expr::Expr {
57 use datafusion_common::config::ConfigOptions;
58 super::$FUNC(&ConfigOptions::default()).call(vec![])
59 }
60 };
61
62 // single vector argument (a single argument followed by a comma)
63 (single $FUNC:ident, $DOC:expr, $arg:ident,) => {
64 #[doc = $DOC]
65 pub fn $FUNC($arg: Vec<datafusion_expr::Expr>) -> datafusion_expr::Expr {
66 super::$FUNC().call($arg)
67 }
68 };
69
70 // variadic arguments (zero or more arguments, without commas)
71 (single $FUNC:ident, $DOC:expr, $($arg:ident)*) => {
72 #[doc = $DOC]
73 pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
74 super::$FUNC().call(vec![$($arg),*])
75 }
76 };
77}
78
79/// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
80/// named `$NAME` which returns that singleton.
81///
82/// This is used to ensure creating the list of `ScalarUDF` only happens once.
83#[macro_export]
84macro_rules! make_udf_function {
85 ($UDF:ty, $NAME:ident) => {
86 #[allow(rustdoc::redundant_explicit_links)]
87 #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
88 pub fn $NAME() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
89 // Singleton instance of the function
90 static INSTANCE: std::sync::LazyLock<
91 std::sync::Arc<datafusion_expr::ScalarUDF>,
92 > = std::sync::LazyLock::new(|| {
93 std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
94 <$UDF>::new(),
95 ))
96 });
97 std::sync::Arc::clone(&INSTANCE)
98 }
99 };
100}
101
102/// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
103/// named `$NAME` which returns that singleton. The function takes a
104/// configuration argument of type `$CONFIG_TYPE` to create the UDF.
105#[macro_export]
106macro_rules! make_udf_function_with_config {
107 ($UDF:ty, $NAME:ident) => {
108 #[allow(rustdoc::redundant_explicit_links)]
109 #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
110 pub fn $NAME(config: &datafusion_common::config::ConfigOptions) -> std::sync::Arc<datafusion_expr::ScalarUDF> {
111 std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
112 <$UDF>::new_with_config(&config),
113 ))
114 }
115 };
116}
117
118/// Macro creates a sub module if the feature is not enabled
119///
120/// The rationale for providing stub functions is to help users to configure datafusion
121/// properly (so they get an error telling them why a function is not available)
122/// instead of getting a cryptic "no function found" message at runtime.
123macro_rules! make_stub_package {
124 ($name:ident, $feature:literal) => {
125 #[cfg(not(feature = $feature))]
126 #[doc = concat!("Disabled. Enable via feature flag `", $feature, "`")]
127 pub mod $name {
128 use datafusion_expr::ScalarUDF;
129 use log::debug;
130 use std::sync::Arc;
131
132 /// Returns an empty list of functions when the feature is not enabled
133 pub fn functions() -> Vec<Arc<ScalarUDF>> {
134 debug!("{} functions disabled", stringify!($name));
135 vec![]
136 }
137 }
138 };
139}
140
141/// Downcast a named argument to a specific array type, returning an internal error
142/// if the cast fails
143///
144/// $ARG: ArrayRef
145/// $NAME: name of the argument (for error messages)
146/// $ARRAY_TYPE: the type of array to cast the argument to
147#[macro_export]
148macro_rules! downcast_named_arg {
149 ($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{
150 $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
151 datafusion_common::internal_datafusion_err!(
152 "could not cast {} to {}",
153 $NAME,
154 std::any::type_name::<$ARRAY_TYPE>()
155 )
156 })?
157 }};
158}
159
160/// Downcast an argument to a specific array type, returning an internal error
161/// if the cast fails
162///
163/// $ARG: ArrayRef
164/// $ARRAY_TYPE: the type of array to cast the argument to
165#[macro_export]
166macro_rules! downcast_arg {
167 ($ARG:expr, $ARRAY_TYPE:ident) => {{
168 $crate::downcast_named_arg!($ARG, "", $ARRAY_TYPE)
169 }};
170}
171
172/// Macro to create a unary math UDF.
173///
174/// A unary math function takes an argument of type Float32 or Float64,
175/// applies a unary floating function to the argument, and returns a value of the same type.
176///
177/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
178/// $NAME: the name of the function
179/// $UNARY_FUNC: the unary function to apply to the argument
180/// $OUTPUT_ORDERING: the output ordering calculation method of the function
181/// $GET_DOC: the function to get the documentation of the UDF
182macro_rules! make_math_unary_udf {
183 ($UDF:ident, $NAME:ident, $UNARY_FUNC:ident, $OUTPUT_ORDERING:expr, $EVALUATE_BOUNDS:expr, $GET_DOC:expr) => {
184 $crate::make_udf_function!($NAME::$UDF, $NAME);
185
186 mod $NAME {
187 use std::any::Any;
188 use std::sync::Arc;
189
190 use arrow::array::{ArrayRef, AsArray};
191 use arrow::datatypes::{DataType, Float32Type, Float64Type};
192 use datafusion_common::{exec_err, Result};
193 use datafusion_expr::interval_arithmetic::Interval;
194 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
195 use datafusion_expr::{
196 ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
197 Signature, Volatility,
198 };
199
200 #[derive(Debug, PartialEq, Eq, Hash)]
201 pub struct $UDF {
202 signature: Signature,
203 }
204
205 impl $UDF {
206 pub fn new() -> Self {
207 use DataType::*;
208 Self {
209 signature: Signature::uniform(
210 1,
211 vec![Float64, Float32],
212 Volatility::Immutable,
213 ),
214 }
215 }
216 }
217
218 impl ScalarUDFImpl for $UDF {
219 fn as_any(&self) -> &dyn Any {
220 self
221 }
222 fn name(&self) -> &str {
223 stringify!($NAME)
224 }
225
226 fn signature(&self) -> &Signature {
227 &self.signature
228 }
229
230 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
231 let arg_type = &arg_types[0];
232
233 match arg_type {
234 DataType::Float32 => Ok(DataType::Float32),
235 // For other types (possible values float64/null/int), use Float64
236 _ => Ok(DataType::Float64),
237 }
238 }
239
240 fn output_ordering(
241 &self,
242 input: &[ExprProperties],
243 ) -> Result<SortProperties> {
244 $OUTPUT_ORDERING(input)
245 }
246
247 fn evaluate_bounds(&self, inputs: &[&Interval]) -> Result<Interval> {
248 $EVALUATE_BOUNDS(inputs)
249 }
250
251 fn invoke_with_args(
252 &self,
253 args: ScalarFunctionArgs,
254 ) -> Result<ColumnarValue> {
255 let args = ColumnarValue::values_to_arrays(&args.args)?;
256 let arr: ArrayRef = match args[0].data_type() {
257 DataType::Float64 => Arc::new(
258 args[0]
259 .as_primitive::<Float64Type>()
260 .unary::<_, Float64Type>(|x: f64| f64::$UNARY_FUNC(x)),
261 ) as ArrayRef,
262 DataType::Float32 => Arc::new(
263 args[0]
264 .as_primitive::<Float32Type>()
265 .unary::<_, Float32Type>(|x: f32| f32::$UNARY_FUNC(x)),
266 ) as ArrayRef,
267 other => {
268 return exec_err!(
269 "Unsupported data type {other:?} for function {}",
270 self.name()
271 )
272 }
273 };
274
275 Ok(ColumnarValue::Array(arr))
276 }
277
278 fn documentation(&self) -> Option<&Documentation> {
279 Some($GET_DOC())
280 }
281 }
282 }
283 };
284}
285
286/// Macro to create a binary math UDF.
287///
288/// A binary math function takes two arguments of types Float32 or Float64,
289/// applies a binary floating function to the argument, and returns a value of the same type.
290///
291/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
292/// $NAME: the name of the function
293/// $BINARY_FUNC: the binary function to apply to the argument
294/// $OUTPUT_ORDERING: the output ordering calculation method of the function
295/// $GET_DOC: the function to get the documentation of the UDF
296macro_rules! make_math_binary_udf {
297 ($UDF:ident, $NAME:ident, $BINARY_FUNC:ident, $OUTPUT_ORDERING:expr, $GET_DOC:expr) => {
298 $crate::make_udf_function!($NAME::$UDF, $NAME);
299
300 mod $NAME {
301 use std::any::Any;
302 use std::sync::Arc;
303
304 use arrow::array::{ArrayRef, AsArray};
305 use arrow::datatypes::{DataType, Float32Type, Float64Type};
306 use datafusion_common::{exec_err, Result};
307 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
308 use datafusion_expr::TypeSignature;
309 use datafusion_expr::{
310 ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
311 Signature, Volatility,
312 };
313
314 #[derive(Debug, PartialEq, Eq, Hash)]
315 pub struct $UDF {
316 signature: Signature,
317 }
318
319 impl $UDF {
320 pub fn new() -> Self {
321 use DataType::*;
322 Self {
323 signature: Signature::one_of(
324 vec![
325 TypeSignature::Exact(vec![Float32, Float32]),
326 TypeSignature::Exact(vec![Float64, Float64]),
327 ],
328 Volatility::Immutable,
329 ),
330 }
331 }
332 }
333
334 impl ScalarUDFImpl for $UDF {
335 fn as_any(&self) -> &dyn Any {
336 self
337 }
338 fn name(&self) -> &str {
339 stringify!($NAME)
340 }
341
342 fn signature(&self) -> &Signature {
343 &self.signature
344 }
345
346 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
347 let arg_type = &arg_types[0];
348
349 match arg_type {
350 DataType::Float32 => Ok(DataType::Float32),
351 // For other types (possible values float64/null/int), use Float64
352 _ => Ok(DataType::Float64),
353 }
354 }
355
356 fn output_ordering(
357 &self,
358 input: &[ExprProperties],
359 ) -> Result<SortProperties> {
360 $OUTPUT_ORDERING(input)
361 }
362
363 fn invoke_with_args(
364 &self,
365 args: ScalarFunctionArgs,
366 ) -> Result<ColumnarValue> {
367 let args = ColumnarValue::values_to_arrays(&args.args)?;
368 let arr: ArrayRef = match args[0].data_type() {
369 DataType::Float64 => {
370 let y = args[0].as_primitive::<Float64Type>();
371 let x = args[1].as_primitive::<Float64Type>();
372 let result = arrow::compute::binary::<_, _, _, Float64Type>(
373 y,
374 x,
375 |y, x| f64::$BINARY_FUNC(y, x),
376 )?;
377 Arc::new(result) as _
378 }
379 DataType::Float32 => {
380 let y = args[0].as_primitive::<Float32Type>();
381 let x = args[1].as_primitive::<Float32Type>();
382 let result = arrow::compute::binary::<_, _, _, Float32Type>(
383 y,
384 x,
385 |y, x| f32::$BINARY_FUNC(y, x),
386 )?;
387 Arc::new(result) as _
388 }
389 other => {
390 return exec_err!(
391 "Unsupported data type {other:?} for function {}",
392 self.name()
393 )
394 }
395 };
396
397 Ok(ColumnarValue::Array(arr))
398 }
399
400 fn documentation(&self) -> Option<&Documentation> {
401 Some($GET_DOC())
402 }
403 }
404 }
405 };
406}