datafusion_functions/macros.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18/// macro that exports a list of function names as:
19/// 1. individual functions in an `expr_fn` module
20/// 2. a single function that returns a list of all functions
21///
22/// Equivalent to
23/// ```text
24/// pub mod expr_fn {
25/// use super::*;
26/// /// Return encode(arg)
27/// pub fn encode(args: Vec<Expr>) -> Expr {
28/// super::encode().call(args)
29/// }
30/// ...
31/// /// Return a list of all functions in this package
32/// pub(crate) fn functions() -> Vec<Arc<ScalarUDF>> {
33/// vec![
34/// encode(),
35/// decode()
36/// ]
37/// }
38/// ```
39///
40/// Exported functions accept:
41/// - `Vec<Expr>` argument (single argument followed by a comma)
42/// - Variable number of `Expr` arguments (zero or more arguments, must be without commas)
43#[macro_export]
44macro_rules! export_functions {
45 ($(($FUNC:ident, $DOC:expr, $($arg:tt)*)),*) => {
46 $(
47 // switch to single-function cases below
48 export_functions!(single $FUNC, $DOC, $($arg)*);
49 )*
50 };
51
52 // single vector argument (a single argument followed by a comma)
53 (single $FUNC:ident, $DOC:expr, $arg:ident,) => {
54 #[doc = $DOC]
55 pub fn $FUNC($arg: Vec<datafusion_expr::Expr>) -> datafusion_expr::Expr {
56 super::$FUNC().call($arg)
57 }
58 };
59
60 // variadic arguments (zero or more arguments, without commas)
61 (single $FUNC:ident, $DOC:expr, $($arg:ident)*) => {
62 #[doc = $DOC]
63 pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
64 super::$FUNC().call(vec![$($arg),*])
65 }
66 };
67}
68
69/// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
70/// named `$NAME` which returns that singleton.
71///
72/// This is used to ensure creating the list of `ScalarUDF` only happens once.
73#[macro_export]
74macro_rules! make_udf_function {
75 ($UDF:ty, $NAME:ident) => {
76 #[allow(rustdoc::redundant_explicit_links)]
77 #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
78 pub fn $NAME() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
79 // Singleton instance of the function
80 static INSTANCE: std::sync::LazyLock<
81 std::sync::Arc<datafusion_expr::ScalarUDF>,
82 > = std::sync::LazyLock::new(|| {
83 std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
84 <$UDF>::new(),
85 ))
86 });
87 std::sync::Arc::clone(&INSTANCE)
88 }
89 };
90}
91
92/// Macro creates a sub module if the feature is not enabled
93///
94/// The rationale for providing stub functions is to help users to configure datafusion
95/// properly (so they get an error telling them why a function is not available)
96/// instead of getting a cryptic "no function found" message at runtime.
97macro_rules! make_stub_package {
98 ($name:ident, $feature:literal) => {
99 #[cfg(not(feature = $feature))]
100 #[doc = concat!("Disabled. Enable via feature flag `", $feature, "`")]
101 pub mod $name {
102 use datafusion_expr::ScalarUDF;
103 use log::debug;
104 use std::sync::Arc;
105
106 /// Returns an empty list of functions when the feature is not enabled
107 pub fn functions() -> Vec<Arc<ScalarUDF>> {
108 debug!("{} functions disabled", stringify!($name));
109 vec![]
110 }
111 }
112 };
113}
114
115/// Downcast a named argument to a specific array type, returning an internal error
116/// if the cast fails
117///
118/// $ARG: ArrayRef
119/// $NAME: name of the argument (for error messages)
120/// $ARRAY_TYPE: the type of array to cast the argument to
121#[macro_export]
122macro_rules! downcast_named_arg {
123 ($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{
124 $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
125 internal_datafusion_err!(
126 "could not cast {} to {}",
127 $NAME,
128 std::any::type_name::<$ARRAY_TYPE>()
129 )
130 })?
131 }};
132}
133
134/// Downcast an argument to a specific array type, returning an internal error
135/// if the cast fails
136///
137/// $ARG: ArrayRef
138/// $ARRAY_TYPE: the type of array to cast the argument to
139#[macro_export]
140macro_rules! downcast_arg {
141 ($ARG:expr, $ARRAY_TYPE:ident) => {{
142 downcast_named_arg!($ARG, "", $ARRAY_TYPE)
143 }};
144}
145
146/// Macro to create a unary math UDF.
147///
148/// A unary math function takes an argument of type Float32 or Float64,
149/// applies a unary floating function to the argument, and returns a value of the same type.
150///
151/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
152/// $NAME: the name of the function
153/// $UNARY_FUNC: the unary function to apply to the argument
154/// $OUTPUT_ORDERING: the output ordering calculation method of the function
155/// $GET_DOC: the function to get the documentation of the UDF
156macro_rules! make_math_unary_udf {
157 ($UDF:ident, $NAME:ident, $UNARY_FUNC:ident, $OUTPUT_ORDERING:expr, $EVALUATE_BOUNDS:expr, $GET_DOC:expr) => {
158 make_udf_function!($NAME::$UDF, $NAME);
159
160 mod $NAME {
161 use std::any::Any;
162 use std::sync::Arc;
163
164 use arrow::array::{ArrayRef, AsArray};
165 use arrow::datatypes::{DataType, Float32Type, Float64Type};
166 use datafusion_common::{exec_err, Result};
167 use datafusion_expr::interval_arithmetic::Interval;
168 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
169 use datafusion_expr::{
170 ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
171 Signature, Volatility,
172 };
173
174 #[derive(Debug, PartialEq, Eq, Hash)]
175 pub struct $UDF {
176 signature: Signature,
177 }
178
179 impl $UDF {
180 pub fn new() -> Self {
181 use DataType::*;
182 Self {
183 signature: Signature::uniform(
184 1,
185 vec![Float64, Float32],
186 Volatility::Immutable,
187 ),
188 }
189 }
190 }
191
192 impl ScalarUDFImpl for $UDF {
193 fn as_any(&self) -> &dyn Any {
194 self
195 }
196 fn name(&self) -> &str {
197 stringify!($NAME)
198 }
199
200 fn signature(&self) -> &Signature {
201 &self.signature
202 }
203
204 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
205 let arg_type = &arg_types[0];
206
207 match arg_type {
208 DataType::Float32 => Ok(DataType::Float32),
209 // For other types (possible values float64/null/int), use Float64
210 _ => Ok(DataType::Float64),
211 }
212 }
213
214 fn output_ordering(
215 &self,
216 input: &[ExprProperties],
217 ) -> Result<SortProperties> {
218 $OUTPUT_ORDERING(input)
219 }
220
221 fn evaluate_bounds(&self, inputs: &[&Interval]) -> Result<Interval> {
222 $EVALUATE_BOUNDS(inputs)
223 }
224
225 fn invoke_with_args(
226 &self,
227 args: ScalarFunctionArgs,
228 ) -> Result<ColumnarValue> {
229 let args = ColumnarValue::values_to_arrays(&args.args)?;
230 let arr: ArrayRef = match args[0].data_type() {
231 DataType::Float64 => Arc::new(
232 args[0]
233 .as_primitive::<Float64Type>()
234 .unary::<_, Float64Type>(|x: f64| f64::$UNARY_FUNC(x)),
235 ) as ArrayRef,
236 DataType::Float32 => Arc::new(
237 args[0]
238 .as_primitive::<Float32Type>()
239 .unary::<_, Float32Type>(|x: f32| f32::$UNARY_FUNC(x)),
240 ) as ArrayRef,
241 other => {
242 return exec_err!(
243 "Unsupported data type {other:?} for function {}",
244 self.name()
245 )
246 }
247 };
248
249 Ok(ColumnarValue::Array(arr))
250 }
251
252 fn documentation(&self) -> Option<&Documentation> {
253 Some($GET_DOC())
254 }
255 }
256 }
257 };
258}
259
260/// Macro to create a binary math UDF.
261///
262/// A binary math function takes two arguments of types Float32 or Float64,
263/// applies a binary floating function to the argument, and returns a value of the same type.
264///
265/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
266/// $NAME: the name of the function
267/// $BINARY_FUNC: the binary function to apply to the argument
268/// $OUTPUT_ORDERING: the output ordering calculation method of the function
269/// $GET_DOC: the function to get the documentation of the UDF
270macro_rules! make_math_binary_udf {
271 ($UDF:ident, $NAME:ident, $BINARY_FUNC:ident, $OUTPUT_ORDERING:expr, $GET_DOC:expr) => {
272 make_udf_function!($NAME::$UDF, $NAME);
273
274 mod $NAME {
275 use std::any::Any;
276 use std::sync::Arc;
277
278 use arrow::array::{ArrayRef, AsArray};
279 use arrow::datatypes::{DataType, Float32Type, Float64Type};
280 use datafusion_common::{exec_err, Result};
281 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
282 use datafusion_expr::TypeSignature;
283 use datafusion_expr::{
284 ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
285 Signature, Volatility,
286 };
287
288 #[derive(Debug, PartialEq, Eq, Hash)]
289 pub struct $UDF {
290 signature: Signature,
291 }
292
293 impl $UDF {
294 pub fn new() -> Self {
295 use DataType::*;
296 Self {
297 signature: Signature::one_of(
298 vec![
299 TypeSignature::Exact(vec![Float32, Float32]),
300 TypeSignature::Exact(vec![Float64, Float64]),
301 ],
302 Volatility::Immutable,
303 ),
304 }
305 }
306 }
307
308 impl ScalarUDFImpl for $UDF {
309 fn as_any(&self) -> &dyn Any {
310 self
311 }
312 fn name(&self) -> &str {
313 stringify!($NAME)
314 }
315
316 fn signature(&self) -> &Signature {
317 &self.signature
318 }
319
320 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
321 let arg_type = &arg_types[0];
322
323 match arg_type {
324 DataType::Float32 => Ok(DataType::Float32),
325 // For other types (possible values float64/null/int), use Float64
326 _ => Ok(DataType::Float64),
327 }
328 }
329
330 fn output_ordering(
331 &self,
332 input: &[ExprProperties],
333 ) -> Result<SortProperties> {
334 $OUTPUT_ORDERING(input)
335 }
336
337 fn invoke_with_args(
338 &self,
339 args: ScalarFunctionArgs,
340 ) -> Result<ColumnarValue> {
341 let args = ColumnarValue::values_to_arrays(&args.args)?;
342 let arr: ArrayRef = match args[0].data_type() {
343 DataType::Float64 => {
344 let y = args[0].as_primitive::<Float64Type>();
345 let x = args[1].as_primitive::<Float64Type>();
346 let result = arrow::compute::binary::<_, _, _, Float64Type>(
347 y,
348 x,
349 |y, x| f64::$BINARY_FUNC(y, x),
350 )?;
351 Arc::new(result) as _
352 }
353 DataType::Float32 => {
354 let y = args[0].as_primitive::<Float32Type>();
355 let x = args[1].as_primitive::<Float32Type>();
356 let result = arrow::compute::binary::<_, _, _, Float32Type>(
357 y,
358 x,
359 |y, x| f32::$BINARY_FUNC(y, x),
360 )?;
361 Arc::new(result) as _
362 }
363 other => {
364 return exec_err!(
365 "Unsupported data type {other:?} for function {}",
366 self.name()
367 )
368 }
369 };
370
371 Ok(ColumnarValue::Array(arr))
372 }
373
374 fn documentation(&self) -> Option<&Documentation> {
375 Some($GET_DOC())
376 }
377 }
378 }
379 };
380}