datafusion_expr_common/
signature.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Function signatures: [`Volatility`], [`Signature`] and [`TypeSignature`]
19
20use std::fmt::Display;
21use std::hash::Hash;
22
23use crate::type_coercion::aggregates::NUMERICS;
24use arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
25use datafusion_common::internal_err;
26use datafusion_common::types::{LogicalType, LogicalTypeRef, NativeType};
27use datafusion_common::utils::ListCoercion;
28use indexmap::IndexSet;
29use itertools::Itertools;
30
31/// Constant that is used as a placeholder for any valid timezone.
32/// This is used where a function can accept a timestamp type with any
33/// valid timezone, it exists to avoid the need to enumerate all possible
34/// timezones. See [`TypeSignature`] for more details.
35///
36/// Type coercion always ensures that functions will be executed using
37/// timestamp arrays that have a valid time zone. Functions must never
38/// return results with this timezone.
39pub const TIMEZONE_WILDCARD: &str = "+TZ";
40
41/// Constant that is used as a placeholder for any valid fixed size list.
42/// This is used where a function can accept a fixed size list type with any
43/// valid length. It exists to avoid the need to enumerate all possible fixed size list lengths.
44pub const FIXED_SIZE_LIST_WILDCARD: i32 = i32::MIN;
45
46/// How a function's output changes with respect to a fixed input
47///
48/// The volatility of a function determines eligibility for certain
49/// optimizations. You should always define your function to have the strictest
50/// possible volatility to maximize performance and avoid unexpected
51/// results.
52///
53#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
54pub enum Volatility {
55    /// Always returns the same output when given the same input.
56    ///
57    /// DataFusion will inline immutable functions during planning.
58    ///
59    /// For example, the `abs` function is immutable, so `abs(-1)` will be
60    /// evaluated and replaced  with `1` during planning rather than invoking
61    /// the function at runtime.
62    Immutable,
63    /// May return different values given the same input across different
64    /// queries but must return the same value for a given input within a query.
65    ///
66    /// For example, the `now()` function is stable, because the query `select
67    /// col1, now() from t1`, will return different results each time it is run,
68    /// but within the same query, the output of the `now()` function has the
69    /// same value for each output row.
70    ///
71    /// DataFusion will inline `Stable` functions when possible. For example,
72    /// `Stable` functions are inlined when planning a query for execution, but
73    /// not in View definitions or prepared statements.
74    Stable,
75    /// May change the return value from evaluation to evaluation.
76    ///
77    /// Multiple invocations of a volatile function may return different results
78    /// when used in the same query on different rows. An example of this is the
79    /// `random()` function.
80    ///
81    /// DataFusion can not evaluate such functions during planning or push these
82    /// predicates into scans. In the query `select col1, random() from t1`,
83    /// `random()` function will be evaluated for each output row, resulting in
84    /// a unique random value for each row.
85    Volatile,
86}
87
88/// The types of arguments for which a function has implementations.
89///
90/// [`TypeSignature`] **DOES NOT** define the types that a user query could call the
91/// function with. DataFusion will automatically coerce (cast) argument types to
92/// one of the supported function signatures, if possible.
93///
94/// # Overview
95/// Functions typically provide implementations for a small number of different
96/// argument [`DataType`]s, rather than all possible combinations. If a user
97/// calls a function with arguments that do not match any of the declared types,
98/// DataFusion will attempt to automatically coerce (add casts to) function
99/// arguments so they match the [`TypeSignature`]. See the [`type_coercion`] module
100/// for more details
101///
102/// # Example: Numeric Functions
103/// For example, a function like `cos` may only provide an implementation for
104/// [`DataType::Float64`]. When users call `cos` with a different argument type,
105/// such as `cos(int_column)`, and type coercion automatically adds a cast such
106/// as `cos(CAST int_column AS DOUBLE)` during planning.
107///
108/// [`type_coercion`]: crate::type_coercion
109///
110/// ## Example: Strings
111///
112/// There are several different string types in Arrow, such as
113/// [`DataType::Utf8`], [`DataType::LargeUtf8`], and [`DataType::Utf8View`].
114///
115/// Some functions may have specialized implementations for these types, while others
116/// may be able to handle only one of them. For example, a function that
117/// only works with [`DataType::Utf8View`] would have the following signature:
118///
119/// ```
120/// # use arrow::datatypes::DataType;
121/// # use datafusion_expr_common::signature::{TypeSignature};
122///  // Declares the function must be invoked with a single argument of type `Utf8View`.
123///  // if a user calls the function with `Utf8` or `LargeUtf8`, DataFusion will
124///  // automatically add a cast to `Utf8View` during planning.
125///  let type_signature = TypeSignature::Exact(vec![DataType::Utf8View]);
126///
127/// ```
128///
129/// # Example: Timestamps
130///
131/// Types to match are represented using Arrow's [`DataType`].  [`DataType::Timestamp`] has an optional variable
132/// timezone specification. To specify a function can handle a timestamp with *ANY* timezone, use
133/// the [`TIMEZONE_WILDCARD`]. For example:
134///
135/// ```
136/// # use arrow::datatypes::{DataType, TimeUnit};
137/// # use datafusion_expr_common::signature::{TIMEZONE_WILDCARD, TypeSignature};
138/// let type_signature = TypeSignature::Exact(vec![
139///   // A nanosecond precision timestamp with ANY timezone
140///   // matches  Timestamp(Nanosecond, Some("+0:00"))
141///   // matches  Timestamp(Nanosecond, Some("+5:00"))
142///   // does not match  Timestamp(Nanosecond, None)
143///   DataType::Timestamp(TimeUnit::Nanosecond, Some(TIMEZONE_WILDCARD.into())),
144/// ]);
145/// ```
146#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
147pub enum TypeSignature {
148    /// One or more arguments of a common type out of a list of valid types.
149    ///
150    /// For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]).
151    ///
152    /// # Examples
153    ///
154    /// A function such as `concat` is `Variadic(vec![DataType::Utf8,
155    /// DataType::LargeUtf8])`
156    Variadic(Vec<DataType>),
157    /// The acceptable signature and coercions rules are special for this
158    /// function.
159    ///
160    /// If this signature is specified,
161    /// DataFusion will call [`ScalarUDFImpl::coerce_types`] to prepare argument types.
162    ///
163    /// [`ScalarUDFImpl::coerce_types`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/trait.ScalarUDFImpl.html#method.coerce_types
164    UserDefined,
165    /// One or more arguments with arbitrary types
166    VariadicAny,
167    /// One or more arguments of an arbitrary but equal type out of a list of valid types.
168    ///
169    /// # Examples
170    ///
171    /// 1. A function of one argument of f64 is `Uniform(1, vec![DataType::Float64])`
172    /// 2. A function of one argument of f64 or f32 is `Uniform(1, vec![DataType::Float32, DataType::Float64])`
173    Uniform(usize, Vec<DataType>),
174    /// One or more arguments with exactly the specified types in order.
175    ///
176    /// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`].
177    Exact(Vec<DataType>),
178    /// One or more arguments belonging to the [`TypeSignatureClass`], in order.
179    ///
180    /// [`Coercion`] contains not only the desired type but also the allowed
181    /// casts. For example, if you expect a function has string type, but you
182    /// also allow it to be casted from binary type.
183    ///
184    /// For functions that take no arguments (e.g. `random()`) see [`TypeSignature::Nullary`].
185    Coercible(Vec<Coercion>),
186    /// One or more arguments coercible to a single, comparable type.
187    ///
188    /// Each argument will be coerced to a single type using the
189    /// coercion rules described in [`comparison_coercion_numeric`].
190    ///
191    /// # Examples
192    ///
193    /// If the `nullif(1, 2)` function is called with `i32` and `i64` arguments
194    /// the types will both be coerced to `i64` before the function is invoked.
195    ///
196    /// If the `nullif('1', 2)` function is called with `Utf8` and `i64` arguments
197    /// the types will both be coerced to `Utf8` before the function is invoked.
198    ///
199    /// Note:
200    /// - For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]).
201    /// - If all arguments have type [`DataType::Null`], they are coerced to `Utf8`
202    ///
203    /// [`comparison_coercion_numeric`]: crate::type_coercion::binary::comparison_coercion_numeric
204    Comparable(usize),
205    /// One or more arguments of arbitrary types.
206    ///
207    /// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`].
208    Any(usize),
209    /// Matches exactly one of a list of [`TypeSignature`]s.
210    ///
211    /// Coercion is attempted to match the signatures in order, and stops after
212    /// the first success, if any.
213    ///
214    /// # Examples
215    ///
216    /// Since `make_array` takes 0 or more arguments with arbitrary types, its `TypeSignature`
217    /// is `OneOf(vec![Any(0), VariadicAny])`.
218    OneOf(Vec<TypeSignature>),
219    /// A function that has an [`ArrayFunctionSignature`]
220    ArraySignature(ArrayFunctionSignature),
221    /// One or more arguments of numeric types.
222    ///
223    /// See [`NativeType::is_numeric`] to know which type is considered numeric
224    ///
225    /// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`].
226    ///
227    /// [`NativeType::is_numeric`]: datafusion_common::types::NativeType::is_numeric
228    Numeric(usize),
229    /// One or arguments of all the same string types.
230    ///
231    /// The precedence of type from high to low is Utf8View, LargeUtf8 and Utf8.
232    /// Null is considered as `Utf8` by default
233    /// Dictionary with string value type is also handled.
234    ///
235    /// For example, if a function is called with (utf8, large_utf8), all
236    /// arguments will be coerced to  `LargeUtf8`
237    ///
238    /// For functions that take no arguments (e.g. `random()` use [`TypeSignature::Nullary`]).
239    String(usize),
240    /// No arguments
241    Nullary,
242}
243
244impl TypeSignature {
245    #[inline]
246    pub fn is_one_of(&self) -> bool {
247        matches!(self, TypeSignature::OneOf(_))
248    }
249}
250
251/// Represents the class of types that can be used in a function signature.
252///
253/// This is used to specify what types are valid for function arguments in a more flexible way than
254/// just listing specific DataTypes. For example, TypeSignatureClass::Timestamp matches any timestamp
255/// type regardless of timezone or precision.
256///
257/// Used primarily with [`TypeSignature::Coercible`] to define function signatures that can accept
258/// arguments that can be coerced to a particular class of types.
259#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash)]
260pub enum TypeSignatureClass {
261    Timestamp,
262    Time,
263    Interval,
264    Duration,
265    Native(LogicalTypeRef),
266    // TODO:
267    // Numeric
268    Integer,
269}
270
271impl Display for TypeSignatureClass {
272    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
273        write!(f, "TypeSignatureClass::{self:?}")
274    }
275}
276
277impl TypeSignatureClass {
278    /// Get example acceptable types for this `TypeSignatureClass`
279    ///
280    /// This is used for `information_schema` and can be used to generate
281    /// documentation or error messages.
282    fn get_example_types(&self) -> Vec<DataType> {
283        match self {
284            TypeSignatureClass::Native(l) => get_data_types(l.native()),
285            TypeSignatureClass::Timestamp => {
286                vec![
287                    DataType::Timestamp(TimeUnit::Nanosecond, None),
288                    DataType::Timestamp(
289                        TimeUnit::Nanosecond,
290                        Some(TIMEZONE_WILDCARD.into()),
291                    ),
292                ]
293            }
294            TypeSignatureClass::Time => {
295                vec![DataType::Time64(TimeUnit::Nanosecond)]
296            }
297            TypeSignatureClass::Interval => {
298                vec![DataType::Interval(IntervalUnit::DayTime)]
299            }
300            TypeSignatureClass::Duration => {
301                vec![DataType::Duration(TimeUnit::Nanosecond)]
302            }
303            TypeSignatureClass::Integer => {
304                vec![DataType::Int64]
305            }
306        }
307    }
308
309    /// Does the specified `NativeType` match this type signature class?
310    pub fn matches_native_type(
311        self: &TypeSignatureClass,
312        logical_type: &NativeType,
313    ) -> bool {
314        if logical_type == &NativeType::Null {
315            return true;
316        }
317
318        match self {
319            TypeSignatureClass::Native(t) if t.native() == logical_type => true,
320            TypeSignatureClass::Timestamp if logical_type.is_timestamp() => true,
321            TypeSignatureClass::Time if logical_type.is_time() => true,
322            TypeSignatureClass::Interval if logical_type.is_interval() => true,
323            TypeSignatureClass::Duration if logical_type.is_duration() => true,
324            TypeSignatureClass::Integer if logical_type.is_integer() => true,
325            _ => false,
326        }
327    }
328
329    /// What type would `origin_type` be casted to when casting to the specified native type?
330    pub fn default_casted_type(
331        &self,
332        native_type: &NativeType,
333        origin_type: &DataType,
334    ) -> datafusion_common::Result<DataType> {
335        match self {
336            TypeSignatureClass::Native(logical_type) => {
337                logical_type.native().default_cast_for(origin_type)
338            }
339            // If the given type is already a timestamp, we don't change the unit and timezone
340            TypeSignatureClass::Timestamp if native_type.is_timestamp() => {
341                Ok(origin_type.to_owned())
342            }
343            TypeSignatureClass::Time if native_type.is_time() => {
344                Ok(origin_type.to_owned())
345            }
346            TypeSignatureClass::Interval if native_type.is_interval() => {
347                Ok(origin_type.to_owned())
348            }
349            TypeSignatureClass::Duration if native_type.is_duration() => {
350                Ok(origin_type.to_owned())
351            }
352            TypeSignatureClass::Integer if native_type.is_integer() => {
353                Ok(origin_type.to_owned())
354            }
355            _ => internal_err!("May miss the matching logic in `matches_native_type`"),
356        }
357    }
358}
359
360#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
361pub enum ArrayFunctionSignature {
362    /// A function takes at least one List/LargeList/FixedSizeList argument.
363    Array {
364        /// A full list of the arguments accepted by this function.
365        arguments: Vec<ArrayFunctionArgument>,
366        /// Additional information about how array arguments should be coerced.
367        array_coercion: Option<ListCoercion>,
368    },
369    /// A function takes a single argument that must be a List/LargeList/FixedSizeList
370    /// which gets coerced to List, with element type recursively coerced to List too if it is list-like.
371    RecursiveArray,
372    /// Specialized Signature for MapArray
373    /// The function takes a single argument that must be a MapArray
374    MapArray,
375}
376
377impl Display for ArrayFunctionSignature {
378    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
379        match self {
380            ArrayFunctionSignature::Array { arguments, .. } => {
381                for (idx, argument) in arguments.iter().enumerate() {
382                    write!(f, "{argument}")?;
383                    if idx != arguments.len() - 1 {
384                        write!(f, ", ")?;
385                    }
386                }
387                Ok(())
388            }
389            ArrayFunctionSignature::RecursiveArray => {
390                write!(f, "recursive_array")
391            }
392            ArrayFunctionSignature::MapArray => {
393                write!(f, "map_array")
394            }
395        }
396    }
397}
398
399#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
400pub enum ArrayFunctionArgument {
401    /// A non-list or list argument. The list dimensions should be one less than the Array's list
402    /// dimensions.
403    Element,
404    /// An Int64 index argument.
405    Index,
406    /// An argument of type List/LargeList/FixedSizeList. All Array arguments must be coercible
407    /// to the same type.
408    Array,
409    // A Utf8 argument.
410    String,
411}
412
413impl Display for ArrayFunctionArgument {
414    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
415        match self {
416            ArrayFunctionArgument::Element => {
417                write!(f, "element")
418            }
419            ArrayFunctionArgument::Index => {
420                write!(f, "index")
421            }
422            ArrayFunctionArgument::Array => {
423                write!(f, "array")
424            }
425            ArrayFunctionArgument::String => {
426                write!(f, "string")
427            }
428        }
429    }
430}
431
432impl TypeSignature {
433    pub fn to_string_repr(&self) -> Vec<String> {
434        match self {
435            TypeSignature::Nullary => {
436                vec!["NullAry()".to_string()]
437            }
438            TypeSignature::Variadic(types) => {
439                vec![format!("{}, ..", Self::join_types(types, "/"))]
440            }
441            TypeSignature::Uniform(arg_count, valid_types) => {
442                vec![
443                    std::iter::repeat_n(Self::join_types(valid_types, "/"), *arg_count)
444                        .collect::<Vec<String>>()
445                        .join(", "),
446                ]
447            }
448            TypeSignature::String(num) => {
449                vec![format!("String({num})")]
450            }
451            TypeSignature::Numeric(num) => {
452                vec![format!("Numeric({num})")]
453            }
454            TypeSignature::Comparable(num) => {
455                vec![format!("Comparable({num})")]
456            }
457            TypeSignature::Coercible(coercions) => {
458                vec![Self::join_types(coercions, ", ")]
459            }
460            TypeSignature::Exact(types) => {
461                vec![Self::join_types(types, ", ")]
462            }
463            TypeSignature::Any(arg_count) => {
464                vec![std::iter::repeat_n("Any", *arg_count)
465                    .collect::<Vec<&str>>()
466                    .join(", ")]
467            }
468            TypeSignature::UserDefined => {
469                vec!["UserDefined".to_string()]
470            }
471            TypeSignature::VariadicAny => vec!["Any, .., Any".to_string()],
472            TypeSignature::OneOf(sigs) => {
473                sigs.iter().flat_map(|s| s.to_string_repr()).collect()
474            }
475            TypeSignature::ArraySignature(array_signature) => {
476                vec![array_signature.to_string()]
477            }
478        }
479    }
480
481    /// Helper function to join types with specified delimiter.
482    pub fn join_types<T: Display>(types: &[T], delimiter: &str) -> String {
483        types
484            .iter()
485            .map(|t| t.to_string())
486            .collect::<Vec<String>>()
487            .join(delimiter)
488    }
489
490    /// Check whether 0 input argument is valid for given `TypeSignature`
491    pub fn supports_zero_argument(&self) -> bool {
492        match &self {
493            TypeSignature::Exact(vec) => vec.is_empty(),
494            TypeSignature::Nullary => true,
495            TypeSignature::OneOf(types) => types
496                .iter()
497                .any(|type_sig| type_sig.supports_zero_argument()),
498            _ => false,
499        }
500    }
501
502    /// Returns true if the signature currently supports or used to supported 0
503    /// input arguments in a previous version of DataFusion.
504    pub fn used_to_support_zero_arguments(&self) -> bool {
505        match &self {
506            TypeSignature::Any(num) => *num == 0,
507            _ => self.supports_zero_argument(),
508        }
509    }
510
511    #[deprecated(since = "46.0.0", note = "See get_example_types instead")]
512    pub fn get_possible_types(&self) -> Vec<Vec<DataType>> {
513        self.get_example_types()
514    }
515
516    /// Return example acceptable types for this `TypeSignature`'
517    ///
518    /// Returns a `Vec<DataType>` for each argument to the function
519    ///
520    /// This is used for `information_schema` and can be used to generate
521    /// documentation or error messages.
522    pub fn get_example_types(&self) -> Vec<Vec<DataType>> {
523        match self {
524            TypeSignature::Exact(types) => vec![types.clone()],
525            TypeSignature::OneOf(types) => types
526                .iter()
527                .flat_map(|type_sig| type_sig.get_example_types())
528                .collect(),
529            TypeSignature::Uniform(arg_count, types) => types
530                .iter()
531                .cloned()
532                .map(|data_type| vec![data_type; *arg_count])
533                .collect(),
534            TypeSignature::Coercible(coercions) => coercions
535                .iter()
536                .map(|c| {
537                    let mut all_types: IndexSet<DataType> =
538                        c.desired_type().get_example_types().into_iter().collect();
539
540                    if let Some(implicit_coercion) = c.implicit_coercion() {
541                        let allowed_casts: Vec<DataType> = implicit_coercion
542                            .allowed_source_types
543                            .iter()
544                            .flat_map(|t| t.get_example_types())
545                            .collect();
546                        all_types.extend(allowed_casts);
547                    }
548
549                    all_types.into_iter().collect::<Vec<_>>()
550                })
551                .multi_cartesian_product()
552                .collect(),
553            TypeSignature::Variadic(types) => types
554                .iter()
555                .cloned()
556                .map(|data_type| vec![data_type])
557                .collect(),
558            TypeSignature::Numeric(arg_count) => NUMERICS
559                .iter()
560                .cloned()
561                .map(|numeric_type| vec![numeric_type; *arg_count])
562                .collect(),
563            TypeSignature::String(arg_count) => get_data_types(&NativeType::String)
564                .into_iter()
565                .map(|dt| vec![dt; *arg_count])
566                .collect::<Vec<_>>(),
567            // TODO: Implement for other types
568            TypeSignature::Any(_)
569            | TypeSignature::Comparable(_)
570            | TypeSignature::Nullary
571            | TypeSignature::VariadicAny
572            | TypeSignature::ArraySignature(_)
573            | TypeSignature::UserDefined => vec![],
574        }
575    }
576}
577
578fn get_data_types(native_type: &NativeType) -> Vec<DataType> {
579    match native_type {
580        NativeType::Null => vec![DataType::Null],
581        NativeType::Boolean => vec![DataType::Boolean],
582        NativeType::Int8 => vec![DataType::Int8],
583        NativeType::Int16 => vec![DataType::Int16],
584        NativeType::Int32 => vec![DataType::Int32],
585        NativeType::Int64 => vec![DataType::Int64],
586        NativeType::UInt8 => vec![DataType::UInt8],
587        NativeType::UInt16 => vec![DataType::UInt16],
588        NativeType::UInt32 => vec![DataType::UInt32],
589        NativeType::UInt64 => vec![DataType::UInt64],
590        NativeType::Float16 => vec![DataType::Float16],
591        NativeType::Float32 => vec![DataType::Float32],
592        NativeType::Float64 => vec![DataType::Float64],
593        NativeType::Date => vec![DataType::Date32, DataType::Date64],
594        NativeType::Binary => vec![
595            DataType::Binary,
596            DataType::LargeBinary,
597            DataType::BinaryView,
598        ],
599        NativeType::String => {
600            vec![DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View]
601        }
602        // TODO: support other native types
603        _ => vec![],
604    }
605}
606
607/// Represents type coercion rules for function arguments, specifying both the desired type
608/// and optional implicit coercion rules for source types.
609///
610/// # Examples
611///
612/// ```
613/// use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
614/// use datafusion_common::types::{NativeType, logical_binary, logical_string};
615///
616/// // Exact coercion that only accepts timestamp types
617/// let exact = Coercion::new_exact(TypeSignatureClass::Timestamp);
618///
619/// // Implicit coercion that accepts string types but can coerce from binary types
620/// let implicit = Coercion::new_implicit(
621///     TypeSignatureClass::Native(logical_string()),
622///     vec![TypeSignatureClass::Native(logical_binary())],
623///     NativeType::String
624/// );
625/// ```
626///
627/// There are two variants:
628///
629/// * `Exact` - Only accepts arguments that exactly match the desired type
630/// * `Implicit` - Accepts the desired type and can coerce from specified source types
631#[derive(Debug, Clone, Eq, PartialOrd)]
632pub enum Coercion {
633    /// Coercion that only accepts arguments exactly matching the desired type.
634    Exact {
635        /// The required type for the argument
636        desired_type: TypeSignatureClass,
637    },
638
639    /// Coercion that accepts the desired type and can implicitly coerce from other types.
640    Implicit {
641        /// The primary desired type for the argument
642        desired_type: TypeSignatureClass,
643        /// Rules for implicit coercion from other types
644        implicit_coercion: ImplicitCoercion,
645    },
646}
647
648impl Coercion {
649    pub fn new_exact(desired_type: TypeSignatureClass) -> Self {
650        Self::Exact { desired_type }
651    }
652
653    /// Create a new coercion with implicit coercion rules.
654    ///
655    /// `allowed_source_types` defines the possible types that can be coerced to `desired_type`.
656    /// `default_casted_type` is the default type to be used for coercion if we cast from other types via `allowed_source_types`.
657    pub fn new_implicit(
658        desired_type: TypeSignatureClass,
659        allowed_source_types: Vec<TypeSignatureClass>,
660        default_casted_type: NativeType,
661    ) -> Self {
662        Self::Implicit {
663            desired_type,
664            implicit_coercion: ImplicitCoercion {
665                allowed_source_types,
666                default_casted_type,
667            },
668        }
669    }
670
671    pub fn allowed_source_types(&self) -> &[TypeSignatureClass] {
672        match self {
673            Coercion::Exact { .. } => &[],
674            Coercion::Implicit {
675                implicit_coercion, ..
676            } => implicit_coercion.allowed_source_types.as_slice(),
677        }
678    }
679
680    pub fn default_casted_type(&self) -> Option<&NativeType> {
681        match self {
682            Coercion::Exact { .. } => None,
683            Coercion::Implicit {
684                implicit_coercion, ..
685            } => Some(&implicit_coercion.default_casted_type),
686        }
687    }
688
689    pub fn desired_type(&self) -> &TypeSignatureClass {
690        match self {
691            Coercion::Exact { desired_type } => desired_type,
692            Coercion::Implicit { desired_type, .. } => desired_type,
693        }
694    }
695
696    pub fn implicit_coercion(&self) -> Option<&ImplicitCoercion> {
697        match self {
698            Coercion::Exact { .. } => None,
699            Coercion::Implicit {
700                implicit_coercion, ..
701            } => Some(implicit_coercion),
702        }
703    }
704}
705
706impl Display for Coercion {
707    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
708        write!(f, "Coercion({}", self.desired_type())?;
709        if let Some(implicit_coercion) = self.implicit_coercion() {
710            write!(f, ", implicit_coercion={implicit_coercion}",)
711        } else {
712            write!(f, ")")
713        }
714    }
715}
716
717impl PartialEq for Coercion {
718    fn eq(&self, other: &Self) -> bool {
719        self.desired_type() == other.desired_type()
720            && self.implicit_coercion() == other.implicit_coercion()
721    }
722}
723
724impl Hash for Coercion {
725    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
726        self.desired_type().hash(state);
727        self.implicit_coercion().hash(state);
728    }
729}
730
731/// Defines rules for implicit type coercion, specifying which source types can be
732/// coerced and the default type to use when coercing.
733///
734/// This is used by functions to specify which types they can accept via implicit
735/// coercion in addition to their primary desired type.
736///
737/// # Examples
738///
739/// ```
740/// use arrow::datatypes::TimeUnit;
741///
742/// use datafusion_expr_common::signature::{Coercion, ImplicitCoercion, TypeSignatureClass};
743/// use datafusion_common::types::{NativeType, logical_binary};
744///
745/// // Allow coercing from binary types to timestamp, coerce to specific timestamp unit and timezone
746/// let implicit = Coercion::new_implicit(
747///     TypeSignatureClass::Timestamp,
748///     vec![TypeSignatureClass::Native(logical_binary())],
749///     NativeType::Timestamp(TimeUnit::Second, None),
750/// );
751/// ```
752#[derive(Debug, Clone, Eq, PartialOrd)]
753pub struct ImplicitCoercion {
754    /// The types that can be coerced from via implicit casting
755    allowed_source_types: Vec<TypeSignatureClass>,
756
757    /// The default type to use when coercing from allowed source types.
758    /// This is particularly important for types like Timestamp that have multiple
759    /// possible configurations (different time units and timezones).
760    default_casted_type: NativeType,
761}
762
763impl Display for ImplicitCoercion {
764    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
765        write!(
766            f,
767            "ImplicitCoercion({:?}, default_type={:?})",
768            self.allowed_source_types, self.default_casted_type
769        )
770    }
771}
772
773impl PartialEq for ImplicitCoercion {
774    fn eq(&self, other: &Self) -> bool {
775        self.allowed_source_types == other.allowed_source_types
776            && self.default_casted_type == other.default_casted_type
777    }
778}
779
780impl Hash for ImplicitCoercion {
781    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
782        self.allowed_source_types.hash(state);
783        self.default_casted_type.hash(state);
784    }
785}
786
787/// Provides  information necessary for calling a function.
788///
789/// - [`TypeSignature`] defines the argument types that a function has implementations
790///   for.
791///
792/// - [`Volatility`] defines how the output of the function changes with the input.
793#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
794pub struct Signature {
795    /// The data types that the function accepts. See [TypeSignature] for more information.
796    pub type_signature: TypeSignature,
797    /// The volatility of the function. See [Volatility] for more information.
798    pub volatility: Volatility,
799}
800
801impl Signature {
802    /// Creates a new Signature from a given type signature and volatility.
803    pub fn new(type_signature: TypeSignature, volatility: Volatility) -> Self {
804        Signature {
805            type_signature,
806            volatility,
807        }
808    }
809    /// An arbitrary number of arguments with the same type, from those listed in `common_types`.
810    pub fn variadic(common_types: Vec<DataType>, volatility: Volatility) -> Self {
811        Self {
812            type_signature: TypeSignature::Variadic(common_types),
813            volatility,
814        }
815    }
816    /// User-defined coercion rules for the function.
817    pub fn user_defined(volatility: Volatility) -> Self {
818        Self {
819            type_signature: TypeSignature::UserDefined,
820            volatility,
821        }
822    }
823
824    /// A specified number of numeric arguments
825    pub fn numeric(arg_count: usize, volatility: Volatility) -> Self {
826        Self {
827            type_signature: TypeSignature::Numeric(arg_count),
828            volatility,
829        }
830    }
831
832    /// A specified number of numeric arguments
833    pub fn string(arg_count: usize, volatility: Volatility) -> Self {
834        Self {
835            type_signature: TypeSignature::String(arg_count),
836            volatility,
837        }
838    }
839
840    /// An arbitrary number of arguments of any type.
841    pub fn variadic_any(volatility: Volatility) -> Self {
842        Self {
843            type_signature: TypeSignature::VariadicAny,
844            volatility,
845        }
846    }
847    /// A fixed number of arguments of the same type, from those listed in `valid_types`.
848    pub fn uniform(
849        arg_count: usize,
850        valid_types: Vec<DataType>,
851        volatility: Volatility,
852    ) -> Self {
853        Self {
854            type_signature: TypeSignature::Uniform(arg_count, valid_types),
855            volatility,
856        }
857    }
858    /// Exactly matches the types in `exact_types`, in order.
859    pub fn exact(exact_types: Vec<DataType>, volatility: Volatility) -> Self {
860        Signature {
861            type_signature: TypeSignature::Exact(exact_types),
862            volatility,
863        }
864    }
865
866    /// Target coerce types in order
867    pub fn coercible(target_types: Vec<Coercion>, volatility: Volatility) -> Self {
868        Self {
869            type_signature: TypeSignature::Coercible(target_types),
870            volatility,
871        }
872    }
873
874    /// Used for function that expects comparable data types, it will try to coerced all the types into single final one.
875    pub fn comparable(arg_count: usize, volatility: Volatility) -> Self {
876        Self {
877            type_signature: TypeSignature::Comparable(arg_count),
878            volatility,
879        }
880    }
881
882    pub fn nullary(volatility: Volatility) -> Self {
883        Signature {
884            type_signature: TypeSignature::Nullary,
885            volatility,
886        }
887    }
888
889    /// A specified number of arguments of any type
890    pub fn any(arg_count: usize, volatility: Volatility) -> Self {
891        Signature {
892            type_signature: TypeSignature::Any(arg_count),
893            volatility,
894        }
895    }
896
897    /// Any one of a list of [TypeSignature]s.
898    pub fn one_of(type_signatures: Vec<TypeSignature>, volatility: Volatility) -> Self {
899        Signature {
900            type_signature: TypeSignature::OneOf(type_signatures),
901            volatility,
902        }
903    }
904
905    /// Specialized [Signature] for ArrayAppend and similar functions.
906    pub fn array_and_element(volatility: Volatility) -> Self {
907        Signature {
908            type_signature: TypeSignature::ArraySignature(
909                ArrayFunctionSignature::Array {
910                    arguments: vec![
911                        ArrayFunctionArgument::Array,
912                        ArrayFunctionArgument::Element,
913                    ],
914                    array_coercion: Some(ListCoercion::FixedSizedListToList),
915                },
916            ),
917            volatility,
918        }
919    }
920
921    /// Specialized [Signature] for ArrayPrepend and similar functions.
922    pub fn element_and_array(volatility: Volatility) -> Self {
923        Signature {
924            type_signature: TypeSignature::ArraySignature(
925                ArrayFunctionSignature::Array {
926                    arguments: vec![
927                        ArrayFunctionArgument::Element,
928                        ArrayFunctionArgument::Array,
929                    ],
930                    array_coercion: Some(ListCoercion::FixedSizedListToList),
931                },
932            ),
933            volatility,
934        }
935    }
936
937    /// Specialized [Signature] for functions that take a fixed number of arrays.
938    pub fn arrays(
939        n: usize,
940        coercion: Option<ListCoercion>,
941        volatility: Volatility,
942    ) -> Self {
943        Signature {
944            type_signature: TypeSignature::ArraySignature(
945                ArrayFunctionSignature::Array {
946                    arguments: vec![ArrayFunctionArgument::Array; n],
947                    array_coercion: coercion,
948                },
949            ),
950            volatility,
951        }
952    }
953
954    /// Specialized [Signature] for Array functions with an optional index.
955    pub fn array_and_element_and_optional_index(volatility: Volatility) -> Self {
956        Signature {
957            type_signature: TypeSignature::OneOf(vec![
958                TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
959                    arguments: vec![
960                        ArrayFunctionArgument::Array,
961                        ArrayFunctionArgument::Element,
962                    ],
963                    array_coercion: None,
964                }),
965                TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
966                    arguments: vec![
967                        ArrayFunctionArgument::Array,
968                        ArrayFunctionArgument::Element,
969                        ArrayFunctionArgument::Index,
970                    ],
971                    array_coercion: None,
972                }),
973            ]),
974            volatility,
975        }
976    }
977
978    /// Specialized [Signature] for ArrayElement and similar functions.
979    pub fn array_and_index(volatility: Volatility) -> Self {
980        Signature {
981            type_signature: TypeSignature::ArraySignature(
982                ArrayFunctionSignature::Array {
983                    arguments: vec![
984                        ArrayFunctionArgument::Array,
985                        ArrayFunctionArgument::Index,
986                    ],
987                    array_coercion: Some(ListCoercion::FixedSizedListToList),
988                },
989            ),
990            volatility,
991        }
992    }
993
994    /// Specialized [Signature] for ArrayEmpty and similar functions.
995    pub fn array(volatility: Volatility) -> Self {
996        Signature::arrays(1, Some(ListCoercion::FixedSizedListToList), volatility)
997    }
998}
999
1000#[cfg(test)]
1001mod tests {
1002    use datafusion_common::types::{logical_int64, logical_string};
1003
1004    use super::*;
1005
1006    #[test]
1007    fn supports_zero_argument_tests() {
1008        // Testing `TypeSignature`s which supports 0 arg
1009        let positive_cases = vec![
1010            TypeSignature::Exact(vec![]),
1011            TypeSignature::OneOf(vec![
1012                TypeSignature::Exact(vec![DataType::Int8]),
1013                TypeSignature::Nullary,
1014                TypeSignature::Uniform(1, vec![DataType::Int8]),
1015            ]),
1016            TypeSignature::Nullary,
1017        ];
1018
1019        for case in positive_cases {
1020            assert!(
1021                case.supports_zero_argument(),
1022                "Expected {case:?} to support zero arguments"
1023            );
1024        }
1025
1026        // Testing `TypeSignature`s which doesn't support 0 arg
1027        let negative_cases = vec![
1028            TypeSignature::Exact(vec![DataType::Utf8]),
1029            TypeSignature::Uniform(1, vec![DataType::Float64]),
1030            TypeSignature::Any(1),
1031            TypeSignature::VariadicAny,
1032            TypeSignature::OneOf(vec![
1033                TypeSignature::Exact(vec![DataType::Int8]),
1034                TypeSignature::Uniform(1, vec![DataType::Int8]),
1035            ]),
1036        ];
1037
1038        for case in negative_cases {
1039            assert!(
1040                !case.supports_zero_argument(),
1041                "Expected {case:?} not to support zero arguments"
1042            );
1043        }
1044    }
1045
1046    #[test]
1047    fn type_signature_partial_ord() {
1048        // Test validates that partial ord is defined for TypeSignature and Signature.
1049        assert!(TypeSignature::UserDefined < TypeSignature::VariadicAny);
1050        assert!(TypeSignature::UserDefined < TypeSignature::Any(1));
1051
1052        assert!(
1053            TypeSignature::Uniform(1, vec![DataType::Null])
1054                < TypeSignature::Uniform(1, vec![DataType::Boolean])
1055        );
1056        assert!(
1057            TypeSignature::Uniform(1, vec![DataType::Null])
1058                < TypeSignature::Uniform(2, vec![DataType::Null])
1059        );
1060        assert!(
1061            TypeSignature::Uniform(usize::MAX, vec![DataType::Null])
1062                < TypeSignature::Exact(vec![DataType::Null])
1063        );
1064    }
1065
1066    #[test]
1067    fn test_get_possible_types() {
1068        let type_signature = TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]);
1069        let possible_types = type_signature.get_example_types();
1070        assert_eq!(possible_types, vec![vec![DataType::Int32, DataType::Int64]]);
1071
1072        let type_signature = TypeSignature::OneOf(vec![
1073            TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]),
1074            TypeSignature::Exact(vec![DataType::Float32, DataType::Float64]),
1075        ]);
1076        let possible_types = type_signature.get_example_types();
1077        assert_eq!(
1078            possible_types,
1079            vec![
1080                vec![DataType::Int32, DataType::Int64],
1081                vec![DataType::Float32, DataType::Float64]
1082            ]
1083        );
1084
1085        let type_signature = TypeSignature::OneOf(vec![
1086            TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]),
1087            TypeSignature::Exact(vec![DataType::Float32, DataType::Float64]),
1088            TypeSignature::Exact(vec![DataType::Utf8]),
1089        ]);
1090        let possible_types = type_signature.get_example_types();
1091        assert_eq!(
1092            possible_types,
1093            vec![
1094                vec![DataType::Int32, DataType::Int64],
1095                vec![DataType::Float32, DataType::Float64],
1096                vec![DataType::Utf8]
1097            ]
1098        );
1099
1100        let type_signature =
1101            TypeSignature::Uniform(2, vec![DataType::Float32, DataType::Int64]);
1102        let possible_types = type_signature.get_example_types();
1103        assert_eq!(
1104            possible_types,
1105            vec![
1106                vec![DataType::Float32, DataType::Float32],
1107                vec![DataType::Int64, DataType::Int64]
1108            ]
1109        );
1110
1111        let type_signature = TypeSignature::Coercible(vec![
1112            Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
1113            Coercion::new_exact(TypeSignatureClass::Native(logical_int64())),
1114        ]);
1115        let possible_types = type_signature.get_example_types();
1116        assert_eq!(
1117            possible_types,
1118            vec![
1119                vec![DataType::Utf8, DataType::Int64],
1120                vec![DataType::LargeUtf8, DataType::Int64],
1121                vec![DataType::Utf8View, DataType::Int64]
1122            ]
1123        );
1124
1125        let type_signature =
1126            TypeSignature::Variadic(vec![DataType::Int32, DataType::Int64]);
1127        let possible_types = type_signature.get_example_types();
1128        assert_eq!(
1129            possible_types,
1130            vec![vec![DataType::Int32], vec![DataType::Int64]]
1131        );
1132
1133        let type_signature = TypeSignature::Numeric(2);
1134        let possible_types = type_signature.get_example_types();
1135        assert_eq!(
1136            possible_types,
1137            vec![
1138                vec![DataType::Int8, DataType::Int8],
1139                vec![DataType::Int16, DataType::Int16],
1140                vec![DataType::Int32, DataType::Int32],
1141                vec![DataType::Int64, DataType::Int64],
1142                vec![DataType::UInt8, DataType::UInt8],
1143                vec![DataType::UInt16, DataType::UInt16],
1144                vec![DataType::UInt32, DataType::UInt32],
1145                vec![DataType::UInt64, DataType::UInt64],
1146                vec![DataType::Float32, DataType::Float32],
1147                vec![DataType::Float64, DataType::Float64]
1148            ]
1149        );
1150
1151        let type_signature = TypeSignature::String(2);
1152        let possible_types = type_signature.get_example_types();
1153        assert_eq!(
1154            possible_types,
1155            vec![
1156                vec![DataType::Utf8, DataType::Utf8],
1157                vec![DataType::LargeUtf8, DataType::LargeUtf8],
1158                vec![DataType::Utf8View, DataType::Utf8View]
1159            ]
1160        );
1161    }
1162}