datafusion_functions/macros.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18/// macro that exports a list of function names as:
19/// 1. individual functions in an `expr_fn` module
20/// 2. a single function that returns a list of all functions
21///
22/// Equivalent to
23/// ```text
24/// pub mod expr_fn {
25/// use super::*;
26/// /// Return encode(arg)
27/// pub fn encode(args: Vec<Expr>) -> Expr {
28/// super::encode().call(args)
29/// }
30/// ...
31/// /// Return a list of all functions in this package
32/// pub(crate) fn functions() -> Vec<Arc<ScalarUDF>> {
33/// vec![
34/// encode(),
35/// decode()
36/// ]
37/// }
38/// ```
39///
40/// Exported functions accept:
41/// - `Vec<Expr>` argument (single argument followed by a comma)
42/// - Variable number of `Expr` arguments (zero or more arguments, must be without commas)
43#[macro_export]
44macro_rules! export_functions {
45 ($(($FUNC:ident, $DOC:expr, $($arg:tt)*)),*) => {
46 $(
47 // switch to single-function cases below
48 export_functions!(single $FUNC, $DOC, $($arg)*);
49 )*
50 };
51
52 // single vector argument (a single argument followed by a comma)
53 (single $FUNC:ident, $DOC:expr, $arg:ident,) => {
54 #[doc = $DOC]
55 pub fn $FUNC($arg: Vec<datafusion_expr::Expr>) -> datafusion_expr::Expr {
56 super::$FUNC().call($arg)
57 }
58 };
59
60 // variadic arguments (zero or more arguments, without commas)
61 (single $FUNC:ident, $DOC:expr, $($arg:ident)*) => {
62 #[doc = $DOC]
63 pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
64 super::$FUNC().call(vec![$($arg),*])
65 }
66 };
67}
68
69/// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
70/// named `$NAME` which returns that singleton.
71///
72/// This is used to ensure creating the list of `ScalarUDF` only happens once.
73#[macro_export]
74macro_rules! make_udf_function {
75 ($UDF:ty, $NAME:ident) => {
76 #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
77 pub fn $NAME() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
78 // Singleton instance of the function
79 static INSTANCE: std::sync::LazyLock<
80 std::sync::Arc<datafusion_expr::ScalarUDF>,
81 > = std::sync::LazyLock::new(|| {
82 std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
83 <$UDF>::new(),
84 ))
85 });
86 std::sync::Arc::clone(&INSTANCE)
87 }
88 };
89}
90
91/// Macro creates a sub module if the feature is not enabled
92///
93/// The rationale for providing stub functions is to help users to configure datafusion
94/// properly (so they get an error telling them why a function is not available)
95/// instead of getting a cryptic "no function found" message at runtime.
96macro_rules! make_stub_package {
97 ($name:ident, $feature:literal) => {
98 #[cfg(not(feature = $feature))]
99 #[doc = concat!("Disabled. Enable via feature flag `", $feature, "`")]
100 pub mod $name {
101 use datafusion_expr::ScalarUDF;
102 use log::debug;
103 use std::sync::Arc;
104
105 /// Returns an empty list of functions when the feature is not enabled
106 pub fn functions() -> Vec<Arc<ScalarUDF>> {
107 debug!("{} functions disabled", stringify!($name));
108 vec![]
109 }
110 }
111 };
112}
113
114/// Downcast a named argument to a specific array type, returning an internal error
115/// if the cast fails
116///
117/// $ARG: ArrayRef
118/// $NAME: name of the argument (for error messages)
119/// $ARRAY_TYPE: the type of array to cast the argument to
120#[macro_export]
121macro_rules! downcast_named_arg {
122 ($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{
123 $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
124 internal_datafusion_err!(
125 "could not cast {} to {}",
126 $NAME,
127 std::any::type_name::<$ARRAY_TYPE>()
128 )
129 })?
130 }};
131}
132
133/// Downcast an argument to a specific array type, returning an internal error
134/// if the cast fails
135///
136/// $ARG: ArrayRef
137/// $ARRAY_TYPE: the type of array to cast the argument to
138#[macro_export]
139macro_rules! downcast_arg {
140 ($ARG:expr, $ARRAY_TYPE:ident) => {{
141 downcast_named_arg!($ARG, "", $ARRAY_TYPE)
142 }};
143}
144
145/// Macro to create a unary math UDF.
146///
147/// A unary math function takes an argument of type Float32 or Float64,
148/// applies a unary floating function to the argument, and returns a value of the same type.
149///
150/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
151/// $NAME: the name of the function
152/// $UNARY_FUNC: the unary function to apply to the argument
153/// $OUTPUT_ORDERING: the output ordering calculation method of the function
154/// $GET_DOC: the function to get the documentation of the UDF
155macro_rules! make_math_unary_udf {
156 ($UDF:ident, $NAME:ident, $UNARY_FUNC:ident, $OUTPUT_ORDERING:expr, $EVALUATE_BOUNDS:expr, $GET_DOC:expr) => {
157 make_udf_function!($NAME::$UDF, $NAME);
158
159 mod $NAME {
160 use std::any::Any;
161 use std::sync::Arc;
162
163 use arrow::array::{ArrayRef, AsArray};
164 use arrow::datatypes::{DataType, Float32Type, Float64Type};
165 use datafusion_common::{exec_err, Result};
166 use datafusion_expr::interval_arithmetic::Interval;
167 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
168 use datafusion_expr::{
169 ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
170 Signature, Volatility,
171 };
172
173 #[derive(Debug)]
174 pub struct $UDF {
175 signature: Signature,
176 }
177
178 impl $UDF {
179 pub fn new() -> Self {
180 use DataType::*;
181 Self {
182 signature: Signature::uniform(
183 1,
184 vec![Float64, Float32],
185 Volatility::Immutable,
186 ),
187 }
188 }
189 }
190
191 impl ScalarUDFImpl for $UDF {
192 fn as_any(&self) -> &dyn Any {
193 self
194 }
195 fn name(&self) -> &str {
196 stringify!($NAME)
197 }
198
199 fn signature(&self) -> &Signature {
200 &self.signature
201 }
202
203 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
204 let arg_type = &arg_types[0];
205
206 match arg_type {
207 DataType::Float32 => Ok(DataType::Float32),
208 // For other types (possible values float64/null/int), use Float64
209 _ => Ok(DataType::Float64),
210 }
211 }
212
213 fn output_ordering(
214 &self,
215 input: &[ExprProperties],
216 ) -> Result<SortProperties> {
217 $OUTPUT_ORDERING(input)
218 }
219
220 fn evaluate_bounds(&self, inputs: &[&Interval]) -> Result<Interval> {
221 $EVALUATE_BOUNDS(inputs)
222 }
223
224 fn invoke_with_args(
225 &self,
226 args: ScalarFunctionArgs,
227 ) -> Result<ColumnarValue> {
228 let args = ColumnarValue::values_to_arrays(&args.args)?;
229 let arr: ArrayRef = match args[0].data_type() {
230 DataType::Float64 => Arc::new(
231 args[0]
232 .as_primitive::<Float64Type>()
233 .unary::<_, Float64Type>(|x: f64| f64::$UNARY_FUNC(x)),
234 ) as ArrayRef,
235 DataType::Float32 => Arc::new(
236 args[0]
237 .as_primitive::<Float32Type>()
238 .unary::<_, Float32Type>(|x: f32| f32::$UNARY_FUNC(x)),
239 ) as ArrayRef,
240 other => {
241 return exec_err!(
242 "Unsupported data type {other:?} for function {}",
243 self.name()
244 )
245 }
246 };
247
248 Ok(ColumnarValue::Array(arr))
249 }
250
251 fn documentation(&self) -> Option<&Documentation> {
252 Some($GET_DOC())
253 }
254 }
255 }
256 };
257}
258
259/// Macro to create a binary math UDF.
260///
261/// A binary math function takes two arguments of types Float32 or Float64,
262/// applies a binary floating function to the argument, and returns a value of the same type.
263///
264/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
265/// $NAME: the name of the function
266/// $BINARY_FUNC: the binary function to apply to the argument
267/// $OUTPUT_ORDERING: the output ordering calculation method of the function
268/// $GET_DOC: the function to get the documentation of the UDF
269macro_rules! make_math_binary_udf {
270 ($UDF:ident, $NAME:ident, $BINARY_FUNC:ident, $OUTPUT_ORDERING:expr, $GET_DOC:expr) => {
271 make_udf_function!($NAME::$UDF, $NAME);
272
273 mod $NAME {
274 use std::any::Any;
275 use std::sync::Arc;
276
277 use arrow::array::{ArrayRef, AsArray};
278 use arrow::datatypes::{DataType, Float32Type, Float64Type};
279 use datafusion_common::{exec_err, Result};
280 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
281 use datafusion_expr::TypeSignature;
282 use datafusion_expr::{
283 ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
284 Signature, Volatility,
285 };
286
287 #[derive(Debug)]
288 pub struct $UDF {
289 signature: Signature,
290 }
291
292 impl $UDF {
293 pub fn new() -> Self {
294 use DataType::*;
295 Self {
296 signature: Signature::one_of(
297 vec![
298 TypeSignature::Exact(vec![Float32, Float32]),
299 TypeSignature::Exact(vec![Float64, Float64]),
300 ],
301 Volatility::Immutable,
302 ),
303 }
304 }
305 }
306
307 impl ScalarUDFImpl for $UDF {
308 fn as_any(&self) -> &dyn Any {
309 self
310 }
311 fn name(&self) -> &str {
312 stringify!($NAME)
313 }
314
315 fn signature(&self) -> &Signature {
316 &self.signature
317 }
318
319 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
320 let arg_type = &arg_types[0];
321
322 match arg_type {
323 DataType::Float32 => Ok(DataType::Float32),
324 // For other types (possible values float64/null/int), use Float64
325 _ => Ok(DataType::Float64),
326 }
327 }
328
329 fn output_ordering(
330 &self,
331 input: &[ExprProperties],
332 ) -> Result<SortProperties> {
333 $OUTPUT_ORDERING(input)
334 }
335
336 fn invoke_with_args(
337 &self,
338 args: ScalarFunctionArgs,
339 ) -> Result<ColumnarValue> {
340 let args = ColumnarValue::values_to_arrays(&args.args)?;
341 let arr: ArrayRef = match args[0].data_type() {
342 DataType::Float64 => {
343 let y = args[0].as_primitive::<Float64Type>();
344 let x = args[1].as_primitive::<Float64Type>();
345 let result = arrow::compute::binary::<_, _, _, Float64Type>(
346 y,
347 x,
348 |y, x| f64::$BINARY_FUNC(y, x),
349 )?;
350 Arc::new(result) as _
351 }
352 DataType::Float32 => {
353 let y = args[0].as_primitive::<Float32Type>();
354 let x = args[1].as_primitive::<Float32Type>();
355 let result = arrow::compute::binary::<_, _, _, Float32Type>(
356 y,
357 x,
358 |y, x| f32::$BINARY_FUNC(y, x),
359 )?;
360 Arc::new(result) as _
361 }
362 other => {
363 return exec_err!(
364 "Unsupported data type {other:?} for function {}",
365 self.name()
366 )
367 }
368 };
369
370 Ok(ColumnarValue::Array(arr))
371 }
372
373 fn documentation(&self) -> Option<&Documentation> {
374 Some($GET_DOC())
375 }
376 }
377 }
378 };
379}