datafusion_expr/udf.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ScalarUDF`]: Scalar User Defined Functions
19
20use crate::expr::schema_name_from_exprs_comma_separated_without_space;
21use crate::simplify::{ExprSimplifyResult, SimplifyInfo};
22use crate::sort_properties::{ExprProperties, SortProperties};
23use crate::{ColumnarValue, Documentation, Expr, Signature};
24use arrow::datatypes::DataType;
25use datafusion_common::{not_impl_err, ExprSchema, Result, ScalarValue};
26use datafusion_expr_common::interval_arithmetic::Interval;
27use std::any::Any;
28use std::cmp::Ordering;
29use std::fmt::Debug;
30use std::hash::{DefaultHasher, Hash, Hasher};
31use std::sync::Arc;
32
33/// Logical representation of a Scalar User Defined Function.
34///
35/// A scalar function produces a single row output for each row of input. This
36/// struct contains the information DataFusion needs to plan and invoke
37/// functions you supply such name, type signature, return type, and actual
38/// implementation.
39///
40/// 1. For simple use cases, use [`create_udf`] (examples in [`simple_udf.rs`]).
41///
42/// 2. For advanced use cases, use [`ScalarUDFImpl`] which provides full API
43/// access (examples in [`advanced_udf.rs`]).
44///
45/// See [`Self::call`] to invoke a `ScalarUDF` with arguments.
46///
47/// # API Note
48///
49/// This is a separate struct from `ScalarUDFImpl` to maintain backwards
50/// compatibility with the older API.
51///
52/// [`create_udf`]: crate::expr_fn::create_udf
53/// [`simple_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udf.rs
54/// [`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs
55#[derive(Debug, Clone)]
56pub struct ScalarUDF {
57 inner: Arc<dyn ScalarUDFImpl>,
58}
59
60impl PartialEq for ScalarUDF {
61 fn eq(&self, other: &Self) -> bool {
62 self.inner.equals(other.inner.as_ref())
63 }
64}
65
66// Manual implementation based on `ScalarUDFImpl::equals`
67impl PartialOrd for ScalarUDF {
68 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
69 match self.name().partial_cmp(other.name()) {
70 Some(Ordering::Equal) => self.signature().partial_cmp(other.signature()),
71 cmp => cmp,
72 }
73 }
74}
75
76impl Eq for ScalarUDF {}
77
78impl Hash for ScalarUDF {
79 fn hash<H: Hasher>(&self, state: &mut H) {
80 self.inner.hash_value().hash(state)
81 }
82}
83
84impl ScalarUDF {
85 /// Create a new `ScalarUDF` from a `[ScalarUDFImpl]` trait object
86 ///
87 /// Note this is the same as using the `From` impl (`ScalarUDF::from`)
88 pub fn new_from_impl<F>(fun: F) -> ScalarUDF
89 where
90 F: ScalarUDFImpl + 'static,
91 {
92 Self::new_from_shared_impl(Arc::new(fun))
93 }
94
95 /// Create a new `ScalarUDF` from a `[ScalarUDFImpl]` trait object
96 pub fn new_from_shared_impl(fun: Arc<dyn ScalarUDFImpl>) -> ScalarUDF {
97 Self { inner: fun }
98 }
99
100 /// Return the underlying [`ScalarUDFImpl`] trait object for this function
101 pub fn inner(&self) -> &Arc<dyn ScalarUDFImpl> {
102 &self.inner
103 }
104
105 /// Adds additional names that can be used to invoke this function, in
106 /// addition to `name`
107 ///
108 /// If you implement [`ScalarUDFImpl`] directly you should return aliases directly.
109 pub fn with_aliases(self, aliases: impl IntoIterator<Item = &'static str>) -> Self {
110 Self::new_from_impl(AliasedScalarUDFImpl::new(Arc::clone(&self.inner), aliases))
111 }
112
113 /// Returns a [`Expr`] logical expression to call this UDF with specified
114 /// arguments.
115 ///
116 /// This utility allows easily calling UDFs
117 ///
118 /// # Example
119 /// ```no_run
120 /// use datafusion_expr::{col, lit, ScalarUDF};
121 /// # fn my_udf() -> ScalarUDF { unimplemented!() }
122 /// let my_func: ScalarUDF = my_udf();
123 /// // Create an expr for `my_func(a, 12.3)`
124 /// let expr = my_func.call(vec![col("a"), lit(12.3)]);
125 /// ```
126 pub fn call(&self, args: Vec<Expr>) -> Expr {
127 Expr::ScalarFunction(crate::expr::ScalarFunction::new_udf(
128 Arc::new(self.clone()),
129 args,
130 ))
131 }
132
133 /// Returns this function's name.
134 ///
135 /// See [`ScalarUDFImpl::name`] for more details.
136 pub fn name(&self) -> &str {
137 self.inner.name()
138 }
139
140 /// Returns this function's display_name.
141 ///
142 /// See [`ScalarUDFImpl::display_name`] for more details
143 pub fn display_name(&self, args: &[Expr]) -> Result<String> {
144 self.inner.display_name(args)
145 }
146
147 /// Returns this function's schema_name.
148 ///
149 /// See [`ScalarUDFImpl::schema_name`] for more details
150 pub fn schema_name(&self, args: &[Expr]) -> Result<String> {
151 self.inner.schema_name(args)
152 }
153
154 /// Returns the aliases for this function.
155 ///
156 /// See [`ScalarUDF::with_aliases`] for more details
157 pub fn aliases(&self) -> &[String] {
158 self.inner.aliases()
159 }
160
161 /// Returns this function's [`Signature`] (what input types are accepted).
162 ///
163 /// See [`ScalarUDFImpl::signature`] for more details.
164 pub fn signature(&self) -> &Signature {
165 self.inner.signature()
166 }
167
168 /// The datatype this function returns given the input argument types.
169 /// This function is used when the input arguments are [`DataType`]s.
170 ///
171 /// # Notes
172 ///
173 /// If a function implement [`ScalarUDFImpl::return_type_from_args`],
174 /// its [`ScalarUDFImpl::return_type`] should raise an error.
175 ///
176 /// See [`ScalarUDFImpl::return_type`] for more details.
177 pub fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
178 self.inner.return_type(arg_types)
179 }
180
181 /// Return the datatype this function returns given the input argument types.
182 ///
183 /// See [`ScalarUDFImpl::return_type_from_args`] for more details.
184 pub fn return_type_from_args(&self, args: ReturnTypeArgs) -> Result<ReturnInfo> {
185 self.inner.return_type_from_args(args)
186 }
187
188 /// Do the function rewrite
189 ///
190 /// See [`ScalarUDFImpl::simplify`] for more details.
191 pub fn simplify(
192 &self,
193 args: Vec<Expr>,
194 info: &dyn SimplifyInfo,
195 ) -> Result<ExprSimplifyResult> {
196 self.inner.simplify(args, info)
197 }
198
199 #[allow(deprecated)]
200 pub fn is_nullable(&self, args: &[Expr], schema: &dyn ExprSchema) -> bool {
201 self.inner.is_nullable(args, schema)
202 }
203
204 /// Invoke the function on `args`, returning the appropriate result.
205 ///
206 /// See [`ScalarUDFImpl::invoke_with_args`] for details.
207 pub fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
208 self.inner.invoke_with_args(args)
209 }
210
211 /// Get the circuits of inner implementation
212 pub fn short_circuits(&self) -> bool {
213 self.inner.short_circuits()
214 }
215
216 /// Computes the output interval for a [`ScalarUDF`], given the input
217 /// intervals.
218 ///
219 /// # Parameters
220 ///
221 /// * `inputs` are the intervals for the inputs (children) of this function.
222 ///
223 /// # Example
224 ///
225 /// If the function is `ABS(a)`, and the input interval is `a: [-3, 2]`,
226 /// then the output interval would be `[0, 3]`.
227 pub fn evaluate_bounds(&self, inputs: &[&Interval]) -> Result<Interval> {
228 self.inner.evaluate_bounds(inputs)
229 }
230
231 /// Updates bounds for child expressions, given a known interval for this
232 /// function. This is used to propagate constraints down through an expression
233 /// tree.
234 ///
235 /// # Parameters
236 ///
237 /// * `interval` is the currently known interval for this function.
238 /// * `inputs` are the current intervals for the inputs (children) of this function.
239 ///
240 /// # Returns
241 ///
242 /// A `Vec` of new intervals for the children, in order.
243 ///
244 /// If constraint propagation reveals an infeasibility for any child, returns
245 /// [`None`]. If none of the children intervals change as a result of
246 /// propagation, may return an empty vector instead of cloning `children`.
247 /// This is the default (and conservative) return value.
248 ///
249 /// # Example
250 ///
251 /// If the function is `ABS(a)`, the current `interval` is `[4, 5]` and the
252 /// input `a` is given as `[-7, 3]`, then propagation would return `[-5, 3]`.
253 pub fn propagate_constraints(
254 &self,
255 interval: &Interval,
256 inputs: &[&Interval],
257 ) -> Result<Option<Vec<Interval>>> {
258 self.inner.propagate_constraints(interval, inputs)
259 }
260
261 /// Calculates the [`SortProperties`] of this function based on its
262 /// children's properties.
263 pub fn output_ordering(&self, inputs: &[ExprProperties]) -> Result<SortProperties> {
264 self.inner.output_ordering(inputs)
265 }
266
267 pub fn preserves_lex_ordering(&self, inputs: &[ExprProperties]) -> Result<bool> {
268 self.inner.preserves_lex_ordering(inputs)
269 }
270
271 /// See [`ScalarUDFImpl::coerce_types`] for more details.
272 pub fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
273 self.inner.coerce_types(arg_types)
274 }
275
276 /// Returns the documentation for this Scalar UDF.
277 ///
278 /// Documentation can be accessed programmatically as well as
279 /// generating publicly facing documentation.
280 pub fn documentation(&self) -> Option<&Documentation> {
281 self.inner.documentation()
282 }
283}
284
285impl<F> From<F> for ScalarUDF
286where
287 F: ScalarUDFImpl + 'static,
288{
289 fn from(fun: F) -> Self {
290 Self::new_from_impl(fun)
291 }
292}
293
294/// Arguments passed to [`ScalarUDFImpl::invoke_with_args`] when invoking a
295/// scalar function.
296pub struct ScalarFunctionArgs<'a> {
297 /// The evaluated arguments to the function
298 pub args: Vec<ColumnarValue>,
299 /// The number of rows in record batch being evaluated
300 pub number_rows: usize,
301 /// The return type of the scalar function returned (from `return_type` or `return_type_from_args`)
302 /// when creating the physical expression from the logical expression
303 pub return_type: &'a DataType,
304}
305
306/// Information about arguments passed to the function
307///
308/// This structure contains metadata about how the function was called
309/// such as the type of the arguments, any scalar arguments and if the
310/// arguments can (ever) be null
311///
312/// See [`ScalarUDFImpl::return_type_from_args`] for more information
313#[derive(Debug)]
314pub struct ReturnTypeArgs<'a> {
315 /// The data types of the arguments to the function
316 pub arg_types: &'a [DataType],
317 /// Is argument `i` to the function a scalar (constant)
318 ///
319 /// If argument `i` is not a scalar, it will be None
320 ///
321 /// For example, if a function is called like `my_function(column_a, 5)`
322 /// this field will be `[None, Some(ScalarValue::Int32(Some(5)))]`
323 pub scalar_arguments: &'a [Option<&'a ScalarValue>],
324 /// Can argument `i` (ever) null?
325 pub nullables: &'a [bool],
326}
327
328/// Return metadata for this function.
329///
330/// See [`ScalarUDFImpl::return_type_from_args`] for more information
331#[derive(Debug)]
332pub struct ReturnInfo {
333 return_type: DataType,
334 nullable: bool,
335}
336
337impl ReturnInfo {
338 pub fn new(return_type: DataType, nullable: bool) -> Self {
339 Self {
340 return_type,
341 nullable,
342 }
343 }
344
345 pub fn new_nullable(return_type: DataType) -> Self {
346 Self {
347 return_type,
348 nullable: true,
349 }
350 }
351
352 pub fn new_non_nullable(return_type: DataType) -> Self {
353 Self {
354 return_type,
355 nullable: false,
356 }
357 }
358
359 pub fn return_type(&self) -> &DataType {
360 &self.return_type
361 }
362
363 pub fn nullable(&self) -> bool {
364 self.nullable
365 }
366
367 pub fn into_parts(self) -> (DataType, bool) {
368 (self.return_type, self.nullable)
369 }
370}
371
372/// Trait for implementing user defined scalar functions.
373///
374/// This trait exposes the full API for implementing user defined functions and
375/// can be used to implement any function.
376///
377/// See [`advanced_udf.rs`] for a full example with complete implementation and
378/// [`ScalarUDF`] for other available options.
379///
380/// [`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs
381///
382/// # Basic Example
383/// ```
384/// # use std::any::Any;
385/// # use std::sync::LazyLock;
386/// # use arrow::datatypes::DataType;
387/// # use datafusion_common::{DataFusionError, plan_err, Result};
388/// # use datafusion_expr::{col, ColumnarValue, Documentation, ScalarFunctionArgs, Signature, Volatility};
389/// # use datafusion_expr::{ScalarUDFImpl, ScalarUDF};
390/// # use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH;
391/// /// This struct for a simple UDF that adds one to an int32
392/// #[derive(Debug)]
393/// struct AddOne {
394/// signature: Signature,
395/// }
396///
397/// impl AddOne {
398/// fn new() -> Self {
399/// Self {
400/// signature: Signature::uniform(1, vec![DataType::Int32], Volatility::Immutable),
401/// }
402/// }
403/// }
404///
405/// static DOCUMENTATION: LazyLock<Documentation> = LazyLock::new(|| {
406/// Documentation::builder(DOC_SECTION_MATH, "Add one to an int32", "add_one(2)")
407/// .with_argument("arg1", "The int32 number to add one to")
408/// .build()
409/// });
410///
411/// fn get_doc() -> &'static Documentation {
412/// &DOCUMENTATION
413/// }
414///
415/// /// Implement the ScalarUDFImpl trait for AddOne
416/// impl ScalarUDFImpl for AddOne {
417/// fn as_any(&self) -> &dyn Any { self }
418/// fn name(&self) -> &str { "add_one" }
419/// fn signature(&self) -> &Signature { &self.signature }
420/// fn return_type(&self, args: &[DataType]) -> Result<DataType> {
421/// if !matches!(args.get(0), Some(&DataType::Int32)) {
422/// return plan_err!("add_one only accepts Int32 arguments");
423/// }
424/// Ok(DataType::Int32)
425/// }
426/// // The actual implementation would add one to the argument
427/// fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
428/// unimplemented!()
429/// }
430/// fn documentation(&self) -> Option<&Documentation> {
431/// Some(get_doc())
432/// }
433/// }
434///
435/// // Create a new ScalarUDF from the implementation
436/// let add_one = ScalarUDF::from(AddOne::new());
437///
438/// // Call the function `add_one(col)`
439/// let expr = add_one.call(vec![col("a")]);
440/// ```
441pub trait ScalarUDFImpl: Debug + Send + Sync {
442 // Note: When adding any methods (with default implementations), remember to add them also
443 // into the AliasedScalarUDFImpl below!
444
445 /// Returns this object as an [`Any`] trait object
446 fn as_any(&self) -> &dyn Any;
447
448 /// Returns this function's name
449 fn name(&self) -> &str;
450
451 /// Returns the user-defined display name of function, given the arguments
452 ///
453 /// This can be used to customize the output column name generated by this
454 /// function.
455 ///
456 /// Defaults to `name(args[0], args[1], ...)`
457 fn display_name(&self, args: &[Expr]) -> Result<String> {
458 let names: Vec<String> = args.iter().map(ToString::to_string).collect();
459 // TODO: join with ", " to standardize the formatting of Vec<Expr>, <https://github.com/apache/datafusion/issues/10364>
460 Ok(format!("{}({})", self.name(), names.join(",")))
461 }
462
463 /// Returns the name of the column this expression would create
464 ///
465 /// See [`Expr::schema_name`] for details
466 fn schema_name(&self, args: &[Expr]) -> Result<String> {
467 Ok(format!(
468 "{}({})",
469 self.name(),
470 schema_name_from_exprs_comma_separated_without_space(args)?
471 ))
472 }
473
474 /// Returns the function's [`Signature`] for information about what input
475 /// types are accepted and the function's Volatility.
476 fn signature(&self) -> &Signature;
477
478 /// What [`DataType`] will be returned by this function, given the types of
479 /// the arguments.
480 ///
481 /// # Notes
482 ///
483 /// If you provide an implementation for [`Self::return_type_from_args`],
484 /// DataFusion will not call `return_type` (this function). In such cases
485 /// is recommended to return [`DataFusionError::Internal`].
486 ///
487 /// [`DataFusionError::Internal`]: datafusion_common::DataFusionError::Internal
488 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType>;
489
490 /// What type will be returned by this function, given the arguments?
491 ///
492 /// By default, this function calls [`Self::return_type`] with the
493 /// types of each argument.
494 ///
495 /// # Notes
496 ///
497 /// Most UDFs should implement [`Self::return_type`] and not this
498 /// function as the output type for most functions only depends on the types
499 /// of their inputs (e.g. `sqrt(f32)` is always `f32`).
500 ///
501 /// This function can be used for more advanced cases such as:
502 ///
503 /// 1. specifying nullability
504 /// 2. return types based on the **values** of the arguments (rather than
505 /// their **types**.
506 ///
507 /// # Output Type based on Values
508 ///
509 /// For example, the following two function calls get the same argument
510 /// types (something and a `Utf8` string) but return different types based
511 /// on the value of the second argument:
512 ///
513 /// * `arrow_cast(x, 'Int16')` --> `Int16`
514 /// * `arrow_cast(x, 'Float32')` --> `Float32`
515 ///
516 /// # Requirements
517 ///
518 /// This function **must** consistently return the same type for the same
519 /// logical input even if the input is simplified (e.g. it must return the same
520 /// value for `('foo' | 'bar')` as it does for ('foobar').
521 fn return_type_from_args(&self, args: ReturnTypeArgs) -> Result<ReturnInfo> {
522 let return_type = self.return_type(args.arg_types)?;
523 Ok(ReturnInfo::new_nullable(return_type))
524 }
525
526 #[deprecated(
527 since = "45.0.0",
528 note = "Use `return_type_from_args` instead. if you use `is_nullable` that returns non-nullable with `return_type`, you would need to switch to `return_type_from_args`, you might have error"
529 )]
530 fn is_nullable(&self, _args: &[Expr], _schema: &dyn ExprSchema) -> bool {
531 true
532 }
533
534 /// Invoke the function returning the appropriate result.
535 ///
536 /// # Performance
537 ///
538 /// For the best performance, the implementations should handle the common case
539 /// when one or more of their arguments are constant values (aka
540 /// [`ColumnarValue::Scalar`]).
541 ///
542 /// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments
543 /// to arrays, which will likely be simpler code, but be slower.
544 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue>;
545
546 /// Returns any aliases (alternate names) for this function.
547 ///
548 /// Aliases can be used to invoke the same function using different names.
549 /// For example in some databases `now()` and `current_timestamp()` are
550 /// aliases for the same function. This behavior can be obtained by
551 /// returning `current_timestamp` as an alias for the `now` function.
552 ///
553 /// Note: `aliases` should only include names other than [`Self::name`].
554 /// Defaults to `[]` (no aliases)
555 fn aliases(&self) -> &[String] {
556 &[]
557 }
558
559 /// Optionally apply per-UDF simplification / rewrite rules.
560 ///
561 /// This can be used to apply function specific simplification rules during
562 /// optimization (e.g. `arrow_cast` --> `Expr::Cast`). The default
563 /// implementation does nothing.
564 ///
565 /// Note that DataFusion handles simplifying arguments and "constant
566 /// folding" (replacing a function call with constant arguments such as
567 /// `my_add(1,2) --> 3` ). Thus, there is no need to implement such
568 /// optimizations manually for specific UDFs.
569 ///
570 /// # Arguments
571 /// * `args`: The arguments of the function
572 /// * `info`: The necessary information for simplification
573 ///
574 /// # Returns
575 /// [`ExprSimplifyResult`] indicating the result of the simplification NOTE
576 /// if the function cannot be simplified, the arguments *MUST* be returned
577 /// unmodified
578 fn simplify(
579 &self,
580 args: Vec<Expr>,
581 _info: &dyn SimplifyInfo,
582 ) -> Result<ExprSimplifyResult> {
583 Ok(ExprSimplifyResult::Original(args))
584 }
585
586 /// Returns true if some of this `exprs` subexpressions may not be evaluated
587 /// and thus any side effects (like divide by zero) may not be encountered
588 /// Setting this to true prevents certain optimizations such as common subexpression elimination
589 fn short_circuits(&self) -> bool {
590 false
591 }
592
593 /// Computes the output interval for a [`ScalarUDFImpl`], given the input
594 /// intervals.
595 ///
596 /// # Parameters
597 ///
598 /// * `children` are the intervals for the children (inputs) of this function.
599 ///
600 /// # Example
601 ///
602 /// If the function is `ABS(a)`, and the input interval is `a: [-3, 2]`,
603 /// then the output interval would be `[0, 3]`.
604 fn evaluate_bounds(&self, _input: &[&Interval]) -> Result<Interval> {
605 // We cannot assume the input datatype is the same of output type.
606 Interval::make_unbounded(&DataType::Null)
607 }
608
609 /// Updates bounds for child expressions, given a known interval for this
610 /// function. This is used to propagate constraints down through an expression
611 /// tree.
612 ///
613 /// # Parameters
614 ///
615 /// * `interval` is the currently known interval for this function.
616 /// * `inputs` are the current intervals for the inputs (children) of this function.
617 ///
618 /// # Returns
619 ///
620 /// A `Vec` of new intervals for the children, in order.
621 ///
622 /// If constraint propagation reveals an infeasibility for any child, returns
623 /// [`None`]. If none of the children intervals change as a result of
624 /// propagation, may return an empty vector instead of cloning `children`.
625 /// This is the default (and conservative) return value.
626 ///
627 /// # Example
628 ///
629 /// If the function is `ABS(a)`, the current `interval` is `[4, 5]` and the
630 /// input `a` is given as `[-7, 3]`, then propagation would return `[-5, 3]`.
631 fn propagate_constraints(
632 &self,
633 _interval: &Interval,
634 _inputs: &[&Interval],
635 ) -> Result<Option<Vec<Interval>>> {
636 Ok(Some(vec![]))
637 }
638
639 /// Calculates the [`SortProperties`] of this function based on its children's properties.
640 fn output_ordering(&self, inputs: &[ExprProperties]) -> Result<SortProperties> {
641 if !self.preserves_lex_ordering(inputs)? {
642 return Ok(SortProperties::Unordered);
643 }
644
645 let Some(first_order) = inputs.first().map(|p| &p.sort_properties) else {
646 return Ok(SortProperties::Singleton);
647 };
648
649 if inputs
650 .iter()
651 .skip(1)
652 .all(|input| &input.sort_properties == first_order)
653 {
654 Ok(*first_order)
655 } else {
656 Ok(SortProperties::Unordered)
657 }
658 }
659
660 /// Whether the function preserves lexicographical ordering based on the input ordering
661 fn preserves_lex_ordering(&self, _inputs: &[ExprProperties]) -> Result<bool> {
662 Ok(false)
663 }
664
665 /// Coerce arguments of a function call to types that the function can evaluate.
666 ///
667 /// This function is only called if [`ScalarUDFImpl::signature`] returns [`crate::TypeSignature::UserDefined`]. Most
668 /// UDFs should return one of the other variants of `TypeSignature` which handle common
669 /// cases
670 ///
671 /// See the [type coercion module](crate::type_coercion)
672 /// documentation for more details on type coercion
673 ///
674 /// For example, if your function requires a floating point arguments, but the user calls
675 /// it like `my_func(1::int)` (i.e. with `1` as an integer), coerce_types can return `[DataType::Float64]`
676 /// to ensure the argument is converted to `1::double`
677 ///
678 /// # Parameters
679 /// * `arg_types`: The argument types of the arguments this function with
680 ///
681 /// # Return value
682 /// A Vec the same length as `arg_types`. DataFusion will `CAST` the function call
683 /// arguments to these specific types.
684 fn coerce_types(&self, _arg_types: &[DataType]) -> Result<Vec<DataType>> {
685 not_impl_err!("Function {} does not implement coerce_types", self.name())
686 }
687
688 /// Return true if this scalar UDF is equal to the other.
689 ///
690 /// Allows customizing the equality of scalar UDFs.
691 /// Must be consistent with [`Self::hash_value`] and follow the same rules as [`Eq`]:
692 ///
693 /// - reflexive: `a.equals(a)`;
694 /// - symmetric: `a.equals(b)` implies `b.equals(a)`;
695 /// - transitive: `a.equals(b)` and `b.equals(c)` implies `a.equals(c)`.
696 ///
697 /// By default, compares [`Self::name`] and [`Self::signature`].
698 fn equals(&self, other: &dyn ScalarUDFImpl) -> bool {
699 self.name() == other.name() && self.signature() == other.signature()
700 }
701
702 /// Returns a hash value for this scalar UDF.
703 ///
704 /// Allows customizing the hash code of scalar UDFs. Similarly to [`Hash`] and [`Eq`],
705 /// if [`Self::equals`] returns true for two UDFs, their `hash_value`s must be the same.
706 ///
707 /// By default, hashes [`Self::name`] and [`Self::signature`].
708 fn hash_value(&self) -> u64 {
709 let hasher = &mut DefaultHasher::new();
710 self.name().hash(hasher);
711 self.signature().hash(hasher);
712 hasher.finish()
713 }
714
715 /// Returns the documentation for this Scalar UDF.
716 ///
717 /// Documentation can be accessed programmatically as well as
718 /// generating publicly facing documentation.
719 fn documentation(&self) -> Option<&Documentation> {
720 None
721 }
722}
723
724/// ScalarUDF that adds an alias to the underlying function. It is better to
725/// implement [`ScalarUDFImpl`], which supports aliases, directly if possible.
726#[derive(Debug)]
727struct AliasedScalarUDFImpl {
728 inner: Arc<dyn ScalarUDFImpl>,
729 aliases: Vec<String>,
730}
731
732impl AliasedScalarUDFImpl {
733 pub fn new(
734 inner: Arc<dyn ScalarUDFImpl>,
735 new_aliases: impl IntoIterator<Item = &'static str>,
736 ) -> Self {
737 let mut aliases = inner.aliases().to_vec();
738 aliases.extend(new_aliases.into_iter().map(|s| s.to_string()));
739 Self { inner, aliases }
740 }
741}
742
743impl ScalarUDFImpl for AliasedScalarUDFImpl {
744 fn as_any(&self) -> &dyn Any {
745 self
746 }
747
748 fn name(&self) -> &str {
749 self.inner.name()
750 }
751
752 fn display_name(&self, args: &[Expr]) -> Result<String> {
753 self.inner.display_name(args)
754 }
755
756 fn schema_name(&self, args: &[Expr]) -> Result<String> {
757 self.inner.schema_name(args)
758 }
759
760 fn signature(&self) -> &Signature {
761 self.inner.signature()
762 }
763
764 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
765 self.inner.return_type(arg_types)
766 }
767
768 fn aliases(&self) -> &[String] {
769 &self.aliases
770 }
771
772 fn return_type_from_args(&self, args: ReturnTypeArgs) -> Result<ReturnInfo> {
773 self.inner.return_type_from_args(args)
774 }
775
776 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
777 self.inner.invoke_with_args(args)
778 }
779
780 fn simplify(
781 &self,
782 args: Vec<Expr>,
783 info: &dyn SimplifyInfo,
784 ) -> Result<ExprSimplifyResult> {
785 self.inner.simplify(args, info)
786 }
787
788 fn short_circuits(&self) -> bool {
789 self.inner.short_circuits()
790 }
791
792 fn evaluate_bounds(&self, input: &[&Interval]) -> Result<Interval> {
793 self.inner.evaluate_bounds(input)
794 }
795
796 fn propagate_constraints(
797 &self,
798 interval: &Interval,
799 inputs: &[&Interval],
800 ) -> Result<Option<Vec<Interval>>> {
801 self.inner.propagate_constraints(interval, inputs)
802 }
803
804 fn output_ordering(&self, inputs: &[ExprProperties]) -> Result<SortProperties> {
805 self.inner.output_ordering(inputs)
806 }
807
808 fn preserves_lex_ordering(&self, inputs: &[ExprProperties]) -> Result<bool> {
809 self.inner.preserves_lex_ordering(inputs)
810 }
811
812 fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
813 self.inner.coerce_types(arg_types)
814 }
815
816 fn equals(&self, other: &dyn ScalarUDFImpl) -> bool {
817 if let Some(other) = other.as_any().downcast_ref::<AliasedScalarUDFImpl>() {
818 self.inner.equals(other.inner.as_ref()) && self.aliases == other.aliases
819 } else {
820 false
821 }
822 }
823
824 fn hash_value(&self) -> u64 {
825 let hasher = &mut DefaultHasher::new();
826 self.inner.hash_value().hash(hasher);
827 self.aliases.hash(hasher);
828 hasher.finish()
829 }
830
831 fn documentation(&self) -> Option<&Documentation> {
832 self.inner.documentation()
833 }
834}
835
836// Scalar UDF doc sections for use in public documentation
837pub mod scalar_doc_sections {
838 use crate::DocSection;
839
840 pub fn doc_sections() -> Vec<DocSection> {
841 vec![
842 DOC_SECTION_MATH,
843 DOC_SECTION_CONDITIONAL,
844 DOC_SECTION_STRING,
845 DOC_SECTION_BINARY_STRING,
846 DOC_SECTION_REGEX,
847 DOC_SECTION_DATETIME,
848 DOC_SECTION_ARRAY,
849 DOC_SECTION_STRUCT,
850 DOC_SECTION_MAP,
851 DOC_SECTION_HASHING,
852 DOC_SECTION_UNION,
853 DOC_SECTION_OTHER,
854 ]
855 }
856
857 pub const fn doc_sections_const() -> &'static [DocSection] {
858 &[
859 DOC_SECTION_MATH,
860 DOC_SECTION_CONDITIONAL,
861 DOC_SECTION_STRING,
862 DOC_SECTION_BINARY_STRING,
863 DOC_SECTION_REGEX,
864 DOC_SECTION_DATETIME,
865 DOC_SECTION_ARRAY,
866 DOC_SECTION_STRUCT,
867 DOC_SECTION_MAP,
868 DOC_SECTION_HASHING,
869 DOC_SECTION_UNION,
870 DOC_SECTION_OTHER,
871 ]
872 }
873
874 pub const DOC_SECTION_MATH: DocSection = DocSection {
875 include: true,
876 label: "Math Functions",
877 description: None,
878 };
879
880 pub const DOC_SECTION_CONDITIONAL: DocSection = DocSection {
881 include: true,
882 label: "Conditional Functions",
883 description: None,
884 };
885
886 pub const DOC_SECTION_STRING: DocSection = DocSection {
887 include: true,
888 label: "String Functions",
889 description: None,
890 };
891
892 pub const DOC_SECTION_BINARY_STRING: DocSection = DocSection {
893 include: true,
894 label: "Binary String Functions",
895 description: None,
896 };
897
898 pub const DOC_SECTION_REGEX: DocSection = DocSection {
899 include: true,
900 label: "Regular Expression Functions",
901 description: Some(
902 r#"Apache DataFusion uses a [PCRE-like](https://en.wikibooks.org/wiki/Regular_Expressions/Perl-Compatible_Regular_Expressions)
903regular expression [syntax](https://docs.rs/regex/latest/regex/#syntax)
904(minus support for several features including look-around and backreferences).
905The following regular expression functions are supported:"#,
906 ),
907 };
908
909 pub const DOC_SECTION_DATETIME: DocSection = DocSection {
910 include: true,
911 label: "Time and Date Functions",
912 description: None,
913 };
914
915 pub const DOC_SECTION_ARRAY: DocSection = DocSection {
916 include: true,
917 label: "Array Functions",
918 description: None,
919 };
920
921 pub const DOC_SECTION_STRUCT: DocSection = DocSection {
922 include: true,
923 label: "Struct Functions",
924 description: None,
925 };
926
927 pub const DOC_SECTION_MAP: DocSection = DocSection {
928 include: true,
929 label: "Map Functions",
930 description: None,
931 };
932
933 pub const DOC_SECTION_HASHING: DocSection = DocSection {
934 include: true,
935 label: "Hashing Functions",
936 description: None,
937 };
938
939 pub const DOC_SECTION_OTHER: DocSection = DocSection {
940 include: true,
941 label: "Other Functions",
942 description: None,
943 };
944
945 pub const DOC_SECTION_UNION: DocSection = DocSection {
946 include: true,
947 label: "Union Functions",
948 description: Some("Functions to work with the union data type, also know as tagged unions, variant types, enums or sum types. Note: Not related to the SQL UNION operator"),
949 };
950}