datafusion_expr_common/
signature.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Signature module contains foundational types that are used to represent signatures, types,
19//! and return types of functions in DataFusion.
20
21use std::fmt::Display;
22use std::hash::Hash;
23
24use crate::type_coercion::aggregates::NUMERICS;
25use arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
26use datafusion_common::internal_err;
27use datafusion_common::types::{LogicalType, LogicalTypeRef, NativeType};
28use datafusion_common::utils::ListCoercion;
29use indexmap::IndexSet;
30use itertools::Itertools;
31
32/// Constant that is used as a placeholder for any valid timezone.
33/// This is used where a function can accept a timestamp type with any
34/// valid timezone, it exists to avoid the need to enumerate all possible
35/// timezones. See [`TypeSignature`] for more details.
36///
37/// Type coercion always ensures that functions will be executed using
38/// timestamp arrays that have a valid time zone. Functions must never
39/// return results with this timezone.
40pub const TIMEZONE_WILDCARD: &str = "+TZ";
41
42/// Constant that is used as a placeholder for any valid fixed size list.
43/// This is used where a function can accept a fixed size list type with any
44/// valid length. It exists to avoid the need to enumerate all possible fixed size list lengths.
45pub const FIXED_SIZE_LIST_WILDCARD: i32 = i32::MIN;
46
47/// A function's volatility, which defines the functions eligibility for certain optimizations
48#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
49pub enum Volatility {
50    /// An immutable function will always return the same output when given the same
51    /// input. DataFusion will attempt to inline immutable functions during planning.
52    Immutable,
53    /// A stable function may return different values given the same input across different
54    /// queries but must return the same value for a given input within a query. An example of
55    /// this is the `Now` function. DataFusion will attempt to inline `Stable` functions
56    /// during planning, when possible.
57    /// For query `select col1, now() from t1`, it might take a while to execute but
58    /// `now()` column will be the same for each output row, which is evaluated
59    /// during planning.
60    Stable,
61    /// A volatile function may change the return value from evaluation to evaluation.
62    /// Multiple invocations of a volatile function may return different results when used in the
63    /// same query. An example of this is the random() function. DataFusion
64    /// can not evaluate such functions during planning.
65    /// In the query `select col1, random() from t1`, `random()` function will be evaluated
66    /// for each output row, resulting in a unique random value for each row.
67    Volatile,
68}
69
70/// A function's type signature defines the types of arguments the function supports.
71///
72/// Functions typically support only a few different types of arguments compared to the
73/// different datatypes in Arrow. To make functions easy to use, when possible DataFusion
74/// automatically coerces (add casts to) function arguments so they match the type signature.
75///
76/// For example, a function like `cos` may only be implemented for `Float64` arguments. To support a query
77/// that calls `cos` with a different argument type, such as `cos(int_column)`, type coercion automatically
78/// adds a cast such as `cos(CAST int_column AS DOUBLE)` during planning.
79///
80/// # Data Types
81///
82/// ## Timestamps
83///
84/// Types to match are represented using Arrow's [`DataType`].  [`DataType::Timestamp`] has an optional variable
85/// timezone specification. To specify a function can handle a timestamp with *ANY* timezone, use
86/// the [`TIMEZONE_WILDCARD`]. For example:
87///
88/// ```
89/// # use arrow::datatypes::{DataType, TimeUnit};
90/// # use datafusion_expr_common::signature::{TIMEZONE_WILDCARD, TypeSignature};
91/// let type_signature = TypeSignature::Exact(vec![
92///   // A nanosecond precision timestamp with ANY timezone
93///   // matches  Timestamp(Nanosecond, Some("+0:00"))
94///   // matches  Timestamp(Nanosecond, Some("+5:00"))
95///   // does not match  Timestamp(Nanosecond, None)
96///   DataType::Timestamp(TimeUnit::Nanosecond, Some(TIMEZONE_WILDCARD.into())),
97/// ]);
98/// ```
99#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
100pub enum TypeSignature {
101    /// One or more arguments of a common type out of a list of valid types.
102    ///
103    /// For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]).
104    ///
105    /// # Examples
106    ///
107    /// A function such as `concat` is `Variadic(vec![DataType::Utf8,
108    /// DataType::LargeUtf8])`
109    Variadic(Vec<DataType>),
110    /// The acceptable signature and coercions rules are special for this
111    /// function.
112    ///
113    /// If this signature is specified,
114    /// DataFusion will call [`ScalarUDFImpl::coerce_types`] to prepare argument types.
115    ///
116    /// [`ScalarUDFImpl::coerce_types`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/trait.ScalarUDFImpl.html#method.coerce_types
117    UserDefined,
118    /// One or more arguments with arbitrary types
119    VariadicAny,
120    /// One or more arguments of an arbitrary but equal type out of a list of valid types.
121    ///
122    /// # Examples
123    ///
124    /// 1. A function of one argument of f64 is `Uniform(1, vec![DataType::Float64])`
125    /// 2. A function of one argument of f64 or f32 is `Uniform(1, vec![DataType::Float32, DataType::Float64])`
126    Uniform(usize, Vec<DataType>),
127    /// One or more arguments with exactly the specified types in order.
128    ///
129    /// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`].
130    Exact(Vec<DataType>),
131    /// One or more arguments belonging to the [`TypeSignatureClass`], in order.
132    ///
133    /// [`Coercion`] contains not only the desired type but also the allowed casts.
134    /// For example, if you expect a function has string type, but you also allow it to be casted from binary type.
135    ///
136    /// For functions that take no arguments (e.g. `random()`) see [`TypeSignature::Nullary`].
137    Coercible(Vec<Coercion>),
138    /// One or more arguments coercible to a single, comparable type.
139    ///
140    /// Each argument will be coerced to a single type using the
141    /// coercion rules described in [`comparison_coercion_numeric`].
142    ///
143    /// # Examples
144    ///
145    /// If the `nullif(1, 2)` function is called with `i32` and `i64` arguments
146    /// the types will both be coerced to `i64` before the function is invoked.
147    ///
148    /// If the `nullif('1', 2)` function is called with `Utf8` and `i64` arguments
149    /// the types will both be coerced to `Utf8` before the function is invoked.
150    ///
151    /// Note:
152    /// - For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]).
153    /// - If all arguments have type [`DataType::Null`], they are coerced to `Utf8`
154    ///
155    /// [`comparison_coercion_numeric`]: crate::type_coercion::binary::comparison_coercion_numeric
156    Comparable(usize),
157    /// One or more arguments of arbitrary types.
158    ///
159    /// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`].
160    Any(usize),
161    /// Matches exactly one of a list of [`TypeSignature`]s.
162    ///
163    /// Coercion is attempted to match the signatures in order, and stops after
164    /// the first success, if any.
165    ///
166    /// # Examples
167    ///
168    /// Since `make_array` takes 0 or more arguments with arbitrary types, its `TypeSignature`
169    /// is `OneOf(vec![Any(0), VariadicAny])`.
170    OneOf(Vec<TypeSignature>),
171    /// A function that has an [`ArrayFunctionSignature`]
172    ArraySignature(ArrayFunctionSignature),
173    /// One or more arguments of numeric types.
174    ///
175    /// See [`NativeType::is_numeric`] to know which type is considered numeric
176    ///
177    /// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`].
178    ///
179    /// [`NativeType::is_numeric`]: datafusion_common::types::NativeType::is_numeric
180    Numeric(usize),
181    /// One or arguments of all the same string types.
182    ///
183    /// The precedence of type from high to low is Utf8View, LargeUtf8 and Utf8.
184    /// Null is considered as `Utf8` by default
185    /// Dictionary with string value type is also handled.
186    ///
187    /// For example, if a function is called with (utf8, large_utf8), all
188    /// arguments will be coerced to  `LargeUtf8`
189    ///
190    /// For functions that take no arguments (e.g. `random()` use [`TypeSignature::Nullary`]).
191    String(usize),
192    /// No arguments
193    Nullary,
194}
195
196impl TypeSignature {
197    #[inline]
198    pub fn is_one_of(&self) -> bool {
199        matches!(self, TypeSignature::OneOf(_))
200    }
201}
202
203/// Represents the class of types that can be used in a function signature.
204///
205/// This is used to specify what types are valid for function arguments in a more flexible way than
206/// just listing specific DataTypes. For example, TypeSignatureClass::Timestamp matches any timestamp
207/// type regardless of timezone or precision.
208///
209/// Used primarily with TypeSignature::Coercible to define function signatures that can accept
210/// arguments that can be coerced to a particular class of types.
211#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash)]
212pub enum TypeSignatureClass {
213    Timestamp,
214    Time,
215    Interval,
216    Duration,
217    Native(LogicalTypeRef),
218    // TODO:
219    // Numeric
220    Integer,
221}
222
223impl Display for TypeSignatureClass {
224    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
225        write!(f, "TypeSignatureClass::{self:?}")
226    }
227}
228
229impl TypeSignatureClass {
230    /// Get example acceptable types for this `TypeSignatureClass`
231    ///
232    /// This is used for `information_schema` and can be used to generate
233    /// documentation or error messages.
234    fn get_example_types(&self) -> Vec<DataType> {
235        match self {
236            TypeSignatureClass::Native(l) => get_data_types(l.native()),
237            TypeSignatureClass::Timestamp => {
238                vec![
239                    DataType::Timestamp(TimeUnit::Nanosecond, None),
240                    DataType::Timestamp(
241                        TimeUnit::Nanosecond,
242                        Some(TIMEZONE_WILDCARD.into()),
243                    ),
244                ]
245            }
246            TypeSignatureClass::Time => {
247                vec![DataType::Time64(TimeUnit::Nanosecond)]
248            }
249            TypeSignatureClass::Interval => {
250                vec![DataType::Interval(IntervalUnit::DayTime)]
251            }
252            TypeSignatureClass::Duration => {
253                vec![DataType::Duration(TimeUnit::Nanosecond)]
254            }
255            TypeSignatureClass::Integer => {
256                vec![DataType::Int64]
257            }
258        }
259    }
260
261    /// Does the specified `NativeType` match this type signature class?
262    pub fn matches_native_type(
263        self: &TypeSignatureClass,
264        logical_type: &NativeType,
265    ) -> bool {
266        if logical_type == &NativeType::Null {
267            return true;
268        }
269
270        match self {
271            TypeSignatureClass::Native(t) if t.native() == logical_type => true,
272            TypeSignatureClass::Timestamp if logical_type.is_timestamp() => true,
273            TypeSignatureClass::Time if logical_type.is_time() => true,
274            TypeSignatureClass::Interval if logical_type.is_interval() => true,
275            TypeSignatureClass::Duration if logical_type.is_duration() => true,
276            TypeSignatureClass::Integer if logical_type.is_integer() => true,
277            _ => false,
278        }
279    }
280
281    /// What type would `origin_type` be casted to when casting to the specified native type?
282    pub fn default_casted_type(
283        &self,
284        native_type: &NativeType,
285        origin_type: &DataType,
286    ) -> datafusion_common::Result<DataType> {
287        match self {
288            TypeSignatureClass::Native(logical_type) => {
289                logical_type.native().default_cast_for(origin_type)
290            }
291            // If the given type is already a timestamp, we don't change the unit and timezone
292            TypeSignatureClass::Timestamp if native_type.is_timestamp() => {
293                Ok(origin_type.to_owned())
294            }
295            TypeSignatureClass::Time if native_type.is_time() => {
296                Ok(origin_type.to_owned())
297            }
298            TypeSignatureClass::Interval if native_type.is_interval() => {
299                Ok(origin_type.to_owned())
300            }
301            TypeSignatureClass::Duration if native_type.is_duration() => {
302                Ok(origin_type.to_owned())
303            }
304            TypeSignatureClass::Integer if native_type.is_integer() => {
305                Ok(origin_type.to_owned())
306            }
307            _ => internal_err!("May miss the matching logic in `matches_native_type`"),
308        }
309    }
310}
311
312#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
313pub enum ArrayFunctionSignature {
314    /// A function takes at least one List/LargeList/FixedSizeList argument.
315    Array {
316        /// A full list of the arguments accepted by this function.
317        arguments: Vec<ArrayFunctionArgument>,
318        /// Additional information about how array arguments should be coerced.
319        array_coercion: Option<ListCoercion>,
320    },
321    /// A function takes a single argument that must be a List/LargeList/FixedSizeList
322    /// which gets coerced to List, with element type recursively coerced to List too if it is list-like.
323    RecursiveArray,
324    /// Specialized Signature for MapArray
325    /// The function takes a single argument that must be a MapArray
326    MapArray,
327}
328
329impl Display for ArrayFunctionSignature {
330    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
331        match self {
332            ArrayFunctionSignature::Array { arguments, .. } => {
333                for (idx, argument) in arguments.iter().enumerate() {
334                    write!(f, "{argument}")?;
335                    if idx != arguments.len() - 1 {
336                        write!(f, ", ")?;
337                    }
338                }
339                Ok(())
340            }
341            ArrayFunctionSignature::RecursiveArray => {
342                write!(f, "recursive_array")
343            }
344            ArrayFunctionSignature::MapArray => {
345                write!(f, "map_array")
346            }
347        }
348    }
349}
350
351#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
352pub enum ArrayFunctionArgument {
353    /// A non-list or list argument. The list dimensions should be one less than the Array's list
354    /// dimensions.
355    Element,
356    /// An Int64 index argument.
357    Index,
358    /// An argument of type List/LargeList/FixedSizeList. All Array arguments must be coercible
359    /// to the same type.
360    Array,
361}
362
363impl Display for ArrayFunctionArgument {
364    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
365        match self {
366            ArrayFunctionArgument::Element => {
367                write!(f, "element")
368            }
369            ArrayFunctionArgument::Index => {
370                write!(f, "index")
371            }
372            ArrayFunctionArgument::Array => {
373                write!(f, "array")
374            }
375        }
376    }
377}
378
379impl TypeSignature {
380    pub fn to_string_repr(&self) -> Vec<String> {
381        match self {
382            TypeSignature::Nullary => {
383                vec!["NullAry()".to_string()]
384            }
385            TypeSignature::Variadic(types) => {
386                vec![format!("{}, ..", Self::join_types(types, "/"))]
387            }
388            TypeSignature::Uniform(arg_count, valid_types) => {
389                vec![std::iter::repeat(Self::join_types(valid_types, "/"))
390                    .take(*arg_count)
391                    .collect::<Vec<String>>()
392                    .join(", ")]
393            }
394            TypeSignature::String(num) => {
395                vec![format!("String({num})")]
396            }
397            TypeSignature::Numeric(num) => {
398                vec![format!("Numeric({num})")]
399            }
400            TypeSignature::Comparable(num) => {
401                vec![format!("Comparable({num})")]
402            }
403            TypeSignature::Coercible(coercions) => {
404                vec![Self::join_types(coercions, ", ")]
405            }
406            TypeSignature::Exact(types) => {
407                vec![Self::join_types(types, ", ")]
408            }
409            TypeSignature::Any(arg_count) => {
410                vec![std::iter::repeat("Any")
411                    .take(*arg_count)
412                    .collect::<Vec<&str>>()
413                    .join(", ")]
414            }
415            TypeSignature::UserDefined => {
416                vec!["UserDefined".to_string()]
417            }
418            TypeSignature::VariadicAny => vec!["Any, .., Any".to_string()],
419            TypeSignature::OneOf(sigs) => {
420                sigs.iter().flat_map(|s| s.to_string_repr()).collect()
421            }
422            TypeSignature::ArraySignature(array_signature) => {
423                vec![array_signature.to_string()]
424            }
425        }
426    }
427
428    /// Helper function to join types with specified delimiter.
429    pub fn join_types<T: Display>(types: &[T], delimiter: &str) -> String {
430        types
431            .iter()
432            .map(|t| t.to_string())
433            .collect::<Vec<String>>()
434            .join(delimiter)
435    }
436
437    /// Check whether 0 input argument is valid for given `TypeSignature`
438    pub fn supports_zero_argument(&self) -> bool {
439        match &self {
440            TypeSignature::Exact(vec) => vec.is_empty(),
441            TypeSignature::Nullary => true,
442            TypeSignature::OneOf(types) => types
443                .iter()
444                .any(|type_sig| type_sig.supports_zero_argument()),
445            _ => false,
446        }
447    }
448
449    /// Returns true if the signature currently supports or used to supported 0
450    /// input arguments in a previous version of DataFusion.
451    pub fn used_to_support_zero_arguments(&self) -> bool {
452        match &self {
453            TypeSignature::Any(num) => *num == 0,
454            _ => self.supports_zero_argument(),
455        }
456    }
457
458    #[deprecated(since = "46.0.0", note = "See get_example_types instead")]
459    pub fn get_possible_types(&self) -> Vec<Vec<DataType>> {
460        self.get_example_types()
461    }
462
463    /// Return example acceptable types for this `TypeSignature`'
464    ///
465    /// Returns a `Vec<DataType>` for each argument to the function
466    ///
467    /// This is used for `information_schema` and can be used to generate
468    /// documentation or error messages.
469    pub fn get_example_types(&self) -> Vec<Vec<DataType>> {
470        match self {
471            TypeSignature::Exact(types) => vec![types.clone()],
472            TypeSignature::OneOf(types) => types
473                .iter()
474                .flat_map(|type_sig| type_sig.get_example_types())
475                .collect(),
476            TypeSignature::Uniform(arg_count, types) => types
477                .iter()
478                .cloned()
479                .map(|data_type| vec![data_type; *arg_count])
480                .collect(),
481            TypeSignature::Coercible(coercions) => coercions
482                .iter()
483                .map(|c| {
484                    let mut all_types: IndexSet<DataType> =
485                        c.desired_type().get_example_types().into_iter().collect();
486
487                    if let Some(implicit_coercion) = c.implicit_coercion() {
488                        let allowed_casts: Vec<DataType> = implicit_coercion
489                            .allowed_source_types
490                            .iter()
491                            .flat_map(|t| t.get_example_types())
492                            .collect();
493                        all_types.extend(allowed_casts);
494                    }
495
496                    all_types.into_iter().collect::<Vec<_>>()
497                })
498                .multi_cartesian_product()
499                .collect(),
500            TypeSignature::Variadic(types) => types
501                .iter()
502                .cloned()
503                .map(|data_type| vec![data_type])
504                .collect(),
505            TypeSignature::Numeric(arg_count) => NUMERICS
506                .iter()
507                .cloned()
508                .map(|numeric_type| vec![numeric_type; *arg_count])
509                .collect(),
510            TypeSignature::String(arg_count) => get_data_types(&NativeType::String)
511                .into_iter()
512                .map(|dt| vec![dt; *arg_count])
513                .collect::<Vec<_>>(),
514            // TODO: Implement for other types
515            TypeSignature::Any(_)
516            | TypeSignature::Comparable(_)
517            | TypeSignature::Nullary
518            | TypeSignature::VariadicAny
519            | TypeSignature::ArraySignature(_)
520            | TypeSignature::UserDefined => vec![],
521        }
522    }
523}
524
525fn get_data_types(native_type: &NativeType) -> Vec<DataType> {
526    match native_type {
527        NativeType::Null => vec![DataType::Null],
528        NativeType::Boolean => vec![DataType::Boolean],
529        NativeType::Int8 => vec![DataType::Int8],
530        NativeType::Int16 => vec![DataType::Int16],
531        NativeType::Int32 => vec![DataType::Int32],
532        NativeType::Int64 => vec![DataType::Int64],
533        NativeType::UInt8 => vec![DataType::UInt8],
534        NativeType::UInt16 => vec![DataType::UInt16],
535        NativeType::UInt32 => vec![DataType::UInt32],
536        NativeType::UInt64 => vec![DataType::UInt64],
537        NativeType::Float16 => vec![DataType::Float16],
538        NativeType::Float32 => vec![DataType::Float32],
539        NativeType::Float64 => vec![DataType::Float64],
540        NativeType::Date => vec![DataType::Date32, DataType::Date64],
541        NativeType::Binary => vec![
542            DataType::Binary,
543            DataType::LargeBinary,
544            DataType::BinaryView,
545        ],
546        NativeType::String => {
547            vec![DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View]
548        }
549        // TODO: support other native types
550        _ => vec![],
551    }
552}
553
554/// Represents type coercion rules for function arguments, specifying both the desired type
555/// and optional implicit coercion rules for source types.
556///
557/// # Examples
558///
559/// ```
560/// use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
561/// use datafusion_common::types::{NativeType, logical_binary, logical_string};
562///
563/// // Exact coercion that only accepts timestamp types
564/// let exact = Coercion::new_exact(TypeSignatureClass::Timestamp);
565///
566/// // Implicit coercion that accepts string types but can coerce from binary types
567/// let implicit = Coercion::new_implicit(
568///     TypeSignatureClass::Native(logical_string()),
569///     vec![TypeSignatureClass::Native(logical_binary())],
570///     NativeType::String
571/// );
572/// ```
573///
574/// There are two variants:
575///
576/// * `Exact` - Only accepts arguments that exactly match the desired type
577/// * `Implicit` - Accepts the desired type and can coerce from specified source types
578#[derive(Debug, Clone, Eq, PartialOrd)]
579pub enum Coercion {
580    /// Coercion that only accepts arguments exactly matching the desired type.
581    Exact {
582        /// The required type for the argument
583        desired_type: TypeSignatureClass,
584    },
585
586    /// Coercion that accepts the desired type and can implicitly coerce from other types.
587    Implicit {
588        /// The primary desired type for the argument
589        desired_type: TypeSignatureClass,
590        /// Rules for implicit coercion from other types
591        implicit_coercion: ImplicitCoercion,
592    },
593}
594
595impl Coercion {
596    pub fn new_exact(desired_type: TypeSignatureClass) -> Self {
597        Self::Exact { desired_type }
598    }
599
600    /// Create a new coercion with implicit coercion rules.
601    ///
602    /// `allowed_source_types` defines the possible types that can be coerced to `desired_type`.
603    /// `default_casted_type` is the default type to be used for coercion if we cast from other types via `allowed_source_types`.
604    pub fn new_implicit(
605        desired_type: TypeSignatureClass,
606        allowed_source_types: Vec<TypeSignatureClass>,
607        default_casted_type: NativeType,
608    ) -> Self {
609        Self::Implicit {
610            desired_type,
611            implicit_coercion: ImplicitCoercion {
612                allowed_source_types,
613                default_casted_type,
614            },
615        }
616    }
617
618    pub fn allowed_source_types(&self) -> &[TypeSignatureClass] {
619        match self {
620            Coercion::Exact { .. } => &[],
621            Coercion::Implicit {
622                implicit_coercion, ..
623            } => implicit_coercion.allowed_source_types.as_slice(),
624        }
625    }
626
627    pub fn default_casted_type(&self) -> Option<&NativeType> {
628        match self {
629            Coercion::Exact { .. } => None,
630            Coercion::Implicit {
631                implicit_coercion, ..
632            } => Some(&implicit_coercion.default_casted_type),
633        }
634    }
635
636    pub fn desired_type(&self) -> &TypeSignatureClass {
637        match self {
638            Coercion::Exact { desired_type } => desired_type,
639            Coercion::Implicit { desired_type, .. } => desired_type,
640        }
641    }
642
643    pub fn implicit_coercion(&self) -> Option<&ImplicitCoercion> {
644        match self {
645            Coercion::Exact { .. } => None,
646            Coercion::Implicit {
647                implicit_coercion, ..
648            } => Some(implicit_coercion),
649        }
650    }
651}
652
653impl Display for Coercion {
654    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
655        write!(f, "Coercion({}", self.desired_type())?;
656        if let Some(implicit_coercion) = self.implicit_coercion() {
657            write!(f, ", implicit_coercion={implicit_coercion}",)
658        } else {
659            write!(f, ")")
660        }
661    }
662}
663
664impl PartialEq for Coercion {
665    fn eq(&self, other: &Self) -> bool {
666        self.desired_type() == other.desired_type()
667            && self.implicit_coercion() == other.implicit_coercion()
668    }
669}
670
671impl Hash for Coercion {
672    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
673        self.desired_type().hash(state);
674        self.implicit_coercion().hash(state);
675    }
676}
677
678/// Defines rules for implicit type coercion, specifying which source types can be
679/// coerced and the default type to use when coercing.
680///
681/// This is used by functions to specify which types they can accept via implicit
682/// coercion in addition to their primary desired type.
683///
684/// # Examples
685///
686/// ```
687/// use arrow::datatypes::TimeUnit;
688///
689/// use datafusion_expr_common::signature::{Coercion, ImplicitCoercion, TypeSignatureClass};
690/// use datafusion_common::types::{NativeType, logical_binary};
691///
692/// // Allow coercing from binary types to timestamp, coerce to specific timestamp unit and timezone
693/// let implicit = Coercion::new_implicit(
694///     TypeSignatureClass::Timestamp,
695///     vec![TypeSignatureClass::Native(logical_binary())],
696///     NativeType::Timestamp(TimeUnit::Second, None),
697/// );
698/// ```
699#[derive(Debug, Clone, Eq, PartialOrd)]
700pub struct ImplicitCoercion {
701    /// The types that can be coerced from via implicit casting
702    allowed_source_types: Vec<TypeSignatureClass>,
703
704    /// The default type to use when coercing from allowed source types.
705    /// This is particularly important for types like Timestamp that have multiple
706    /// possible configurations (different time units and timezones).
707    default_casted_type: NativeType,
708}
709
710impl Display for ImplicitCoercion {
711    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
712        write!(
713            f,
714            "ImplicitCoercion({:?}, default_type={:?})",
715            self.allowed_source_types, self.default_casted_type
716        )
717    }
718}
719
720impl PartialEq for ImplicitCoercion {
721    fn eq(&self, other: &Self) -> bool {
722        self.allowed_source_types == other.allowed_source_types
723            && self.default_casted_type == other.default_casted_type
724    }
725}
726
727impl Hash for ImplicitCoercion {
728    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
729        self.allowed_source_types.hash(state);
730        self.default_casted_type.hash(state);
731    }
732}
733
734/// Defines the supported argument types ([`TypeSignature`]) and [`Volatility`] for a function.
735///
736/// DataFusion will automatically coerce (cast) argument types to one of the supported
737/// function signatures, if possible.
738#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
739pub struct Signature {
740    /// The data types that the function accepts. See [TypeSignature] for more information.
741    pub type_signature: TypeSignature,
742    /// The volatility of the function. See [Volatility] for more information.
743    pub volatility: Volatility,
744}
745
746impl Signature {
747    /// Creates a new Signature from a given type signature and volatility.
748    pub fn new(type_signature: TypeSignature, volatility: Volatility) -> Self {
749        Signature {
750            type_signature,
751            volatility,
752        }
753    }
754    /// An arbitrary number of arguments with the same type, from those listed in `common_types`.
755    pub fn variadic(common_types: Vec<DataType>, volatility: Volatility) -> Self {
756        Self {
757            type_signature: TypeSignature::Variadic(common_types),
758            volatility,
759        }
760    }
761    /// User-defined coercion rules for the function.
762    pub fn user_defined(volatility: Volatility) -> Self {
763        Self {
764            type_signature: TypeSignature::UserDefined,
765            volatility,
766        }
767    }
768
769    /// A specified number of numeric arguments
770    pub fn numeric(arg_count: usize, volatility: Volatility) -> Self {
771        Self {
772            type_signature: TypeSignature::Numeric(arg_count),
773            volatility,
774        }
775    }
776
777    /// A specified number of numeric arguments
778    pub fn string(arg_count: usize, volatility: Volatility) -> Self {
779        Self {
780            type_signature: TypeSignature::String(arg_count),
781            volatility,
782        }
783    }
784
785    /// An arbitrary number of arguments of any type.
786    pub fn variadic_any(volatility: Volatility) -> Self {
787        Self {
788            type_signature: TypeSignature::VariadicAny,
789            volatility,
790        }
791    }
792    /// A fixed number of arguments of the same type, from those listed in `valid_types`.
793    pub fn uniform(
794        arg_count: usize,
795        valid_types: Vec<DataType>,
796        volatility: Volatility,
797    ) -> Self {
798        Self {
799            type_signature: TypeSignature::Uniform(arg_count, valid_types),
800            volatility,
801        }
802    }
803    /// Exactly matches the types in `exact_types`, in order.
804    pub fn exact(exact_types: Vec<DataType>, volatility: Volatility) -> Self {
805        Signature {
806            type_signature: TypeSignature::Exact(exact_types),
807            volatility,
808        }
809    }
810
811    /// Target coerce types in order
812    pub fn coercible(target_types: Vec<Coercion>, volatility: Volatility) -> Self {
813        Self {
814            type_signature: TypeSignature::Coercible(target_types),
815            volatility,
816        }
817    }
818
819    /// Used for function that expects comparable data types, it will try to coerced all the types into single final one.
820    pub fn comparable(arg_count: usize, volatility: Volatility) -> Self {
821        Self {
822            type_signature: TypeSignature::Comparable(arg_count),
823            volatility,
824        }
825    }
826
827    pub fn nullary(volatility: Volatility) -> Self {
828        Signature {
829            type_signature: TypeSignature::Nullary,
830            volatility,
831        }
832    }
833
834    /// A specified number of arguments of any type
835    pub fn any(arg_count: usize, volatility: Volatility) -> Self {
836        Signature {
837            type_signature: TypeSignature::Any(arg_count),
838            volatility,
839        }
840    }
841    /// Any one of a list of [TypeSignature]s.
842    pub fn one_of(type_signatures: Vec<TypeSignature>, volatility: Volatility) -> Self {
843        Signature {
844            type_signature: TypeSignature::OneOf(type_signatures),
845            volatility,
846        }
847    }
848    /// Specialized Signature for ArrayAppend and similar functions
849    pub fn array_and_element(volatility: Volatility) -> Self {
850        Signature {
851            type_signature: TypeSignature::ArraySignature(
852                ArrayFunctionSignature::Array {
853                    arguments: vec![
854                        ArrayFunctionArgument::Array,
855                        ArrayFunctionArgument::Element,
856                    ],
857                    array_coercion: Some(ListCoercion::FixedSizedListToList),
858                },
859            ),
860            volatility,
861        }
862    }
863    /// Specialized Signature for Array functions with an optional index
864    pub fn array_and_element_and_optional_index(volatility: Volatility) -> Self {
865        Signature {
866            type_signature: TypeSignature::OneOf(vec![
867                TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
868                    arguments: vec![
869                        ArrayFunctionArgument::Array,
870                        ArrayFunctionArgument::Element,
871                    ],
872                    array_coercion: None,
873                }),
874                TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
875                    arguments: vec![
876                        ArrayFunctionArgument::Array,
877                        ArrayFunctionArgument::Element,
878                        ArrayFunctionArgument::Index,
879                    ],
880                    array_coercion: None,
881                }),
882            ]),
883            volatility,
884        }
885    }
886
887    /// Specialized Signature for ArrayElement and similar functions
888    pub fn array_and_index(volatility: Volatility) -> Self {
889        Signature {
890            type_signature: TypeSignature::ArraySignature(
891                ArrayFunctionSignature::Array {
892                    arguments: vec![
893                        ArrayFunctionArgument::Array,
894                        ArrayFunctionArgument::Index,
895                    ],
896                    array_coercion: None,
897                },
898            ),
899            volatility,
900        }
901    }
902    /// Specialized Signature for ArrayEmpty and similar functions
903    pub fn array(volatility: Volatility) -> Self {
904        Signature {
905            type_signature: TypeSignature::ArraySignature(
906                ArrayFunctionSignature::Array {
907                    arguments: vec![ArrayFunctionArgument::Array],
908                    array_coercion: None,
909                },
910            ),
911            volatility,
912        }
913    }
914}
915
916#[cfg(test)]
917mod tests {
918    use datafusion_common::types::{logical_int64, logical_string};
919
920    use super::*;
921
922    #[test]
923    fn supports_zero_argument_tests() {
924        // Testing `TypeSignature`s which supports 0 arg
925        let positive_cases = vec![
926            TypeSignature::Exact(vec![]),
927            TypeSignature::OneOf(vec![
928                TypeSignature::Exact(vec![DataType::Int8]),
929                TypeSignature::Nullary,
930                TypeSignature::Uniform(1, vec![DataType::Int8]),
931            ]),
932            TypeSignature::Nullary,
933        ];
934
935        for case in positive_cases {
936            assert!(
937                case.supports_zero_argument(),
938                "Expected {:?} to support zero arguments",
939                case
940            );
941        }
942
943        // Testing `TypeSignature`s which doesn't support 0 arg
944        let negative_cases = vec![
945            TypeSignature::Exact(vec![DataType::Utf8]),
946            TypeSignature::Uniform(1, vec![DataType::Float64]),
947            TypeSignature::Any(1),
948            TypeSignature::VariadicAny,
949            TypeSignature::OneOf(vec![
950                TypeSignature::Exact(vec![DataType::Int8]),
951                TypeSignature::Uniform(1, vec![DataType::Int8]),
952            ]),
953        ];
954
955        for case in negative_cases {
956            assert!(
957                !case.supports_zero_argument(),
958                "Expected {:?} not to support zero arguments",
959                case
960            );
961        }
962    }
963
964    #[test]
965    fn type_signature_partial_ord() {
966        // Test validates that partial ord is defined for TypeSignature and Signature.
967        assert!(TypeSignature::UserDefined < TypeSignature::VariadicAny);
968        assert!(TypeSignature::UserDefined < TypeSignature::Any(1));
969
970        assert!(
971            TypeSignature::Uniform(1, vec![DataType::Null])
972                < TypeSignature::Uniform(1, vec![DataType::Boolean])
973        );
974        assert!(
975            TypeSignature::Uniform(1, vec![DataType::Null])
976                < TypeSignature::Uniform(2, vec![DataType::Null])
977        );
978        assert!(
979            TypeSignature::Uniform(usize::MAX, vec![DataType::Null])
980                < TypeSignature::Exact(vec![DataType::Null])
981        );
982    }
983
984    #[test]
985    fn test_get_possible_types() {
986        let type_signature = TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]);
987        let possible_types = type_signature.get_example_types();
988        assert_eq!(possible_types, vec![vec![DataType::Int32, DataType::Int64]]);
989
990        let type_signature = TypeSignature::OneOf(vec![
991            TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]),
992            TypeSignature::Exact(vec![DataType::Float32, DataType::Float64]),
993        ]);
994        let possible_types = type_signature.get_example_types();
995        assert_eq!(
996            possible_types,
997            vec![
998                vec![DataType::Int32, DataType::Int64],
999                vec![DataType::Float32, DataType::Float64]
1000            ]
1001        );
1002
1003        let type_signature = TypeSignature::OneOf(vec![
1004            TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]),
1005            TypeSignature::Exact(vec![DataType::Float32, DataType::Float64]),
1006            TypeSignature::Exact(vec![DataType::Utf8]),
1007        ]);
1008        let possible_types = type_signature.get_example_types();
1009        assert_eq!(
1010            possible_types,
1011            vec![
1012                vec![DataType::Int32, DataType::Int64],
1013                vec![DataType::Float32, DataType::Float64],
1014                vec![DataType::Utf8]
1015            ]
1016        );
1017
1018        let type_signature =
1019            TypeSignature::Uniform(2, vec![DataType::Float32, DataType::Int64]);
1020        let possible_types = type_signature.get_example_types();
1021        assert_eq!(
1022            possible_types,
1023            vec![
1024                vec![DataType::Float32, DataType::Float32],
1025                vec![DataType::Int64, DataType::Int64]
1026            ]
1027        );
1028
1029        let type_signature = TypeSignature::Coercible(vec![
1030            Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
1031            Coercion::new_exact(TypeSignatureClass::Native(logical_int64())),
1032        ]);
1033        let possible_types = type_signature.get_example_types();
1034        assert_eq!(
1035            possible_types,
1036            vec![
1037                vec![DataType::Utf8, DataType::Int64],
1038                vec![DataType::LargeUtf8, DataType::Int64],
1039                vec![DataType::Utf8View, DataType::Int64]
1040            ]
1041        );
1042
1043        let type_signature =
1044            TypeSignature::Variadic(vec![DataType::Int32, DataType::Int64]);
1045        let possible_types = type_signature.get_example_types();
1046        assert_eq!(
1047            possible_types,
1048            vec![vec![DataType::Int32], vec![DataType::Int64]]
1049        );
1050
1051        let type_signature = TypeSignature::Numeric(2);
1052        let possible_types = type_signature.get_example_types();
1053        assert_eq!(
1054            possible_types,
1055            vec![
1056                vec![DataType::Int8, DataType::Int8],
1057                vec![DataType::Int16, DataType::Int16],
1058                vec![DataType::Int32, DataType::Int32],
1059                vec![DataType::Int64, DataType::Int64],
1060                vec![DataType::UInt8, DataType::UInt8],
1061                vec![DataType::UInt16, DataType::UInt16],
1062                vec![DataType::UInt32, DataType::UInt32],
1063                vec![DataType::UInt64, DataType::UInt64],
1064                vec![DataType::Float32, DataType::Float32],
1065                vec![DataType::Float64, DataType::Float64]
1066            ]
1067        );
1068
1069        let type_signature = TypeSignature::String(2);
1070        let possible_types = type_signature.get_example_types();
1071        assert_eq!(
1072            possible_types,
1073            vec![
1074                vec![DataType::Utf8, DataType::Utf8],
1075                vec![DataType::LargeUtf8, DataType::LargeUtf8],
1076                vec![DataType::Utf8View, DataType::Utf8View]
1077            ]
1078        );
1079    }
1080}