datafusion_expr_common/signature.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Signature module contains foundational types that are used to represent signatures, types,
19//! and return types of functions in DataFusion.
20
21use std::fmt::Display;
22use std::hash::Hash;
23
24use crate::type_coercion::aggregates::NUMERICS;
25use arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
26use datafusion_common::internal_err;
27use datafusion_common::types::{LogicalType, LogicalTypeRef, NativeType};
28use datafusion_common::utils::ListCoercion;
29use indexmap::IndexSet;
30use itertools::Itertools;
31
32/// Constant that is used as a placeholder for any valid timezone.
33/// This is used where a function can accept a timestamp type with any
34/// valid timezone, it exists to avoid the need to enumerate all possible
35/// timezones. See [`TypeSignature`] for more details.
36///
37/// Type coercion always ensures that functions will be executed using
38/// timestamp arrays that have a valid time zone. Functions must never
39/// return results with this timezone.
40pub const TIMEZONE_WILDCARD: &str = "+TZ";
41
42/// Constant that is used as a placeholder for any valid fixed size list.
43/// This is used where a function can accept a fixed size list type with any
44/// valid length. It exists to avoid the need to enumerate all possible fixed size list lengths.
45pub const FIXED_SIZE_LIST_WILDCARD: i32 = i32::MIN;
46
47/// A function's volatility, which defines the functions eligibility for certain optimizations
48#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
49pub enum Volatility {
50 /// An immutable function will always return the same output when given the same
51 /// input. DataFusion will attempt to inline immutable functions during planning.
52 Immutable,
53 /// A stable function may return different values given the same input across different
54 /// queries but must return the same value for a given input within a query. An example of
55 /// this is the `Now` function. DataFusion will attempt to inline `Stable` functions
56 /// during planning, when possible.
57 /// For query `select col1, now() from t1`, it might take a while to execute but
58 /// `now()` column will be the same for each output row, which is evaluated
59 /// during planning.
60 Stable,
61 /// A volatile function may change the return value from evaluation to evaluation.
62 /// Multiple invocations of a volatile function may return different results when used in the
63 /// same query. An example of this is the random() function. DataFusion
64 /// can not evaluate such functions during planning.
65 /// In the query `select col1, random() from t1`, `random()` function will be evaluated
66 /// for each output row, resulting in a unique random value for each row.
67 Volatile,
68}
69
70/// A function's type signature defines the types of arguments the function supports.
71///
72/// Functions typically support only a few different types of arguments compared to the
73/// different datatypes in Arrow. To make functions easy to use, when possible DataFusion
74/// automatically coerces (add casts to) function arguments so they match the type signature.
75///
76/// For example, a function like `cos` may only be implemented for `Float64` arguments. To support a query
77/// that calls `cos` with a different argument type, such as `cos(int_column)`, type coercion automatically
78/// adds a cast such as `cos(CAST int_column AS DOUBLE)` during planning.
79///
80/// # Data Types
81///
82/// ## Timestamps
83///
84/// Types to match are represented using Arrow's [`DataType`]. [`DataType::Timestamp`] has an optional variable
85/// timezone specification. To specify a function can handle a timestamp with *ANY* timezone, use
86/// the [`TIMEZONE_WILDCARD`]. For example:
87///
88/// ```
89/// # use arrow::datatypes::{DataType, TimeUnit};
90/// # use datafusion_expr_common::signature::{TIMEZONE_WILDCARD, TypeSignature};
91/// let type_signature = TypeSignature::Exact(vec![
92/// // A nanosecond precision timestamp with ANY timezone
93/// // matches Timestamp(Nanosecond, Some("+0:00"))
94/// // matches Timestamp(Nanosecond, Some("+5:00"))
95/// // does not match Timestamp(Nanosecond, None)
96/// DataType::Timestamp(TimeUnit::Nanosecond, Some(TIMEZONE_WILDCARD.into())),
97/// ]);
98/// ```
99#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
100pub enum TypeSignature {
101 /// One or more arguments of a common type out of a list of valid types.
102 ///
103 /// For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]).
104 ///
105 /// # Examples
106 ///
107 /// A function such as `concat` is `Variadic(vec![DataType::Utf8,
108 /// DataType::LargeUtf8])`
109 Variadic(Vec<DataType>),
110 /// The acceptable signature and coercions rules are special for this
111 /// function.
112 ///
113 /// If this signature is specified,
114 /// DataFusion will call [`ScalarUDFImpl::coerce_types`] to prepare argument types.
115 ///
116 /// [`ScalarUDFImpl::coerce_types`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/trait.ScalarUDFImpl.html#method.coerce_types
117 UserDefined,
118 /// One or more arguments with arbitrary types
119 VariadicAny,
120 /// One or more arguments of an arbitrary but equal type out of a list of valid types.
121 ///
122 /// # Examples
123 ///
124 /// 1. A function of one argument of f64 is `Uniform(1, vec![DataType::Float64])`
125 /// 2. A function of one argument of f64 or f32 is `Uniform(1, vec![DataType::Float32, DataType::Float64])`
126 Uniform(usize, Vec<DataType>),
127 /// One or more arguments with exactly the specified types in order.
128 ///
129 /// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`].
130 Exact(Vec<DataType>),
131 /// One or more arguments belonging to the [`TypeSignatureClass`], in order.
132 ///
133 /// [`Coercion`] contains not only the desired type but also the allowed casts.
134 /// For example, if you expect a function has string type, but you also allow it to be casted from binary type.
135 ///
136 /// For functions that take no arguments (e.g. `random()`) see [`TypeSignature::Nullary`].
137 Coercible(Vec<Coercion>),
138 /// One or more arguments coercible to a single, comparable type.
139 ///
140 /// Each argument will be coerced to a single type using the
141 /// coercion rules described in [`comparison_coercion_numeric`].
142 ///
143 /// # Examples
144 ///
145 /// If the `nullif(1, 2)` function is called with `i32` and `i64` arguments
146 /// the types will both be coerced to `i64` before the function is invoked.
147 ///
148 /// If the `nullif('1', 2)` function is called with `Utf8` and `i64` arguments
149 /// the types will both be coerced to `Utf8` before the function is invoked.
150 ///
151 /// Note:
152 /// - For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]).
153 /// - If all arguments have type [`DataType::Null`], they are coerced to `Utf8`
154 ///
155 /// [`comparison_coercion_numeric`]: crate::type_coercion::binary::comparison_coercion_numeric
156 Comparable(usize),
157 /// One or more arguments of arbitrary types.
158 ///
159 /// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`].
160 Any(usize),
161 /// Matches exactly one of a list of [`TypeSignature`]s.
162 ///
163 /// Coercion is attempted to match the signatures in order, and stops after
164 /// the first success, if any.
165 ///
166 /// # Examples
167 ///
168 /// Since `make_array` takes 0 or more arguments with arbitrary types, its `TypeSignature`
169 /// is `OneOf(vec![Any(0), VariadicAny])`.
170 OneOf(Vec<TypeSignature>),
171 /// A function that has an [`ArrayFunctionSignature`]
172 ArraySignature(ArrayFunctionSignature),
173 /// One or more arguments of numeric types.
174 ///
175 /// See [`NativeType::is_numeric`] to know which type is considered numeric
176 ///
177 /// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`].
178 ///
179 /// [`NativeType::is_numeric`]: datafusion_common::types::NativeType::is_numeric
180 Numeric(usize),
181 /// One or arguments of all the same string types.
182 ///
183 /// The precedence of type from high to low is Utf8View, LargeUtf8 and Utf8.
184 /// Null is considered as `Utf8` by default
185 /// Dictionary with string value type is also handled.
186 ///
187 /// For example, if a function is called with (utf8, large_utf8), all
188 /// arguments will be coerced to `LargeUtf8`
189 ///
190 /// For functions that take no arguments (e.g. `random()` use [`TypeSignature::Nullary`]).
191 String(usize),
192 /// No arguments
193 Nullary,
194}
195
196impl TypeSignature {
197 #[inline]
198 pub fn is_one_of(&self) -> bool {
199 matches!(self, TypeSignature::OneOf(_))
200 }
201}
202
203/// Represents the class of types that can be used in a function signature.
204///
205/// This is used to specify what types are valid for function arguments in a more flexible way than
206/// just listing specific DataTypes. For example, TypeSignatureClass::Timestamp matches any timestamp
207/// type regardless of timezone or precision.
208///
209/// Used primarily with TypeSignature::Coercible to define function signatures that can accept
210/// arguments that can be coerced to a particular class of types.
211#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash)]
212pub enum TypeSignatureClass {
213 Timestamp,
214 Time,
215 Interval,
216 Duration,
217 Native(LogicalTypeRef),
218 // TODO:
219 // Numeric
220 Integer,
221}
222
223impl Display for TypeSignatureClass {
224 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
225 write!(f, "TypeSignatureClass::{self:?}")
226 }
227}
228
229impl TypeSignatureClass {
230 /// Get example acceptable types for this `TypeSignatureClass`
231 ///
232 /// This is used for `information_schema` and can be used to generate
233 /// documentation or error messages.
234 fn get_example_types(&self) -> Vec<DataType> {
235 match self {
236 TypeSignatureClass::Native(l) => get_data_types(l.native()),
237 TypeSignatureClass::Timestamp => {
238 vec![
239 DataType::Timestamp(TimeUnit::Nanosecond, None),
240 DataType::Timestamp(
241 TimeUnit::Nanosecond,
242 Some(TIMEZONE_WILDCARD.into()),
243 ),
244 ]
245 }
246 TypeSignatureClass::Time => {
247 vec![DataType::Time64(TimeUnit::Nanosecond)]
248 }
249 TypeSignatureClass::Interval => {
250 vec![DataType::Interval(IntervalUnit::DayTime)]
251 }
252 TypeSignatureClass::Duration => {
253 vec![DataType::Duration(TimeUnit::Nanosecond)]
254 }
255 TypeSignatureClass::Integer => {
256 vec![DataType::Int64]
257 }
258 }
259 }
260
261 /// Does the specified `NativeType` match this type signature class?
262 pub fn matches_native_type(
263 self: &TypeSignatureClass,
264 logical_type: &NativeType,
265 ) -> bool {
266 if logical_type == &NativeType::Null {
267 return true;
268 }
269
270 match self {
271 TypeSignatureClass::Native(t) if t.native() == logical_type => true,
272 TypeSignatureClass::Timestamp if logical_type.is_timestamp() => true,
273 TypeSignatureClass::Time if logical_type.is_time() => true,
274 TypeSignatureClass::Interval if logical_type.is_interval() => true,
275 TypeSignatureClass::Duration if logical_type.is_duration() => true,
276 TypeSignatureClass::Integer if logical_type.is_integer() => true,
277 _ => false,
278 }
279 }
280
281 /// What type would `origin_type` be casted to when casting to the specified native type?
282 pub fn default_casted_type(
283 &self,
284 native_type: &NativeType,
285 origin_type: &DataType,
286 ) -> datafusion_common::Result<DataType> {
287 match self {
288 TypeSignatureClass::Native(logical_type) => {
289 logical_type.native().default_cast_for(origin_type)
290 }
291 // If the given type is already a timestamp, we don't change the unit and timezone
292 TypeSignatureClass::Timestamp if native_type.is_timestamp() => {
293 Ok(origin_type.to_owned())
294 }
295 TypeSignatureClass::Time if native_type.is_time() => {
296 Ok(origin_type.to_owned())
297 }
298 TypeSignatureClass::Interval if native_type.is_interval() => {
299 Ok(origin_type.to_owned())
300 }
301 TypeSignatureClass::Duration if native_type.is_duration() => {
302 Ok(origin_type.to_owned())
303 }
304 TypeSignatureClass::Integer if native_type.is_integer() => {
305 Ok(origin_type.to_owned())
306 }
307 _ => internal_err!("May miss the matching logic in `matches_native_type`"),
308 }
309 }
310}
311
312#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
313pub enum ArrayFunctionSignature {
314 /// A function takes at least one List/LargeList/FixedSizeList argument.
315 Array {
316 /// A full list of the arguments accepted by this function.
317 arguments: Vec<ArrayFunctionArgument>,
318 /// Additional information about how array arguments should be coerced.
319 array_coercion: Option<ListCoercion>,
320 },
321 /// A function takes a single argument that must be a List/LargeList/FixedSizeList
322 /// which gets coerced to List, with element type recursively coerced to List too if it is list-like.
323 RecursiveArray,
324 /// Specialized Signature for MapArray
325 /// The function takes a single argument that must be a MapArray
326 MapArray,
327}
328
329impl Display for ArrayFunctionSignature {
330 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
331 match self {
332 ArrayFunctionSignature::Array { arguments, .. } => {
333 for (idx, argument) in arguments.iter().enumerate() {
334 write!(f, "{argument}")?;
335 if idx != arguments.len() - 1 {
336 write!(f, ", ")?;
337 }
338 }
339 Ok(())
340 }
341 ArrayFunctionSignature::RecursiveArray => {
342 write!(f, "recursive_array")
343 }
344 ArrayFunctionSignature::MapArray => {
345 write!(f, "map_array")
346 }
347 }
348 }
349}
350
351#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
352pub enum ArrayFunctionArgument {
353 /// A non-list or list argument. The list dimensions should be one less than the Array's list
354 /// dimensions.
355 Element,
356 /// An Int64 index argument.
357 Index,
358 /// An argument of type List/LargeList/FixedSizeList. All Array arguments must be coercible
359 /// to the same type.
360 Array,
361}
362
363impl Display for ArrayFunctionArgument {
364 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
365 match self {
366 ArrayFunctionArgument::Element => {
367 write!(f, "element")
368 }
369 ArrayFunctionArgument::Index => {
370 write!(f, "index")
371 }
372 ArrayFunctionArgument::Array => {
373 write!(f, "array")
374 }
375 }
376 }
377}
378
379impl TypeSignature {
380 pub fn to_string_repr(&self) -> Vec<String> {
381 match self {
382 TypeSignature::Nullary => {
383 vec!["NullAry()".to_string()]
384 }
385 TypeSignature::Variadic(types) => {
386 vec![format!("{}, ..", Self::join_types(types, "/"))]
387 }
388 TypeSignature::Uniform(arg_count, valid_types) => {
389 vec![std::iter::repeat(Self::join_types(valid_types, "/"))
390 .take(*arg_count)
391 .collect::<Vec<String>>()
392 .join(", ")]
393 }
394 TypeSignature::String(num) => {
395 vec![format!("String({num})")]
396 }
397 TypeSignature::Numeric(num) => {
398 vec![format!("Numeric({num})")]
399 }
400 TypeSignature::Comparable(num) => {
401 vec![format!("Comparable({num})")]
402 }
403 TypeSignature::Coercible(coercions) => {
404 vec![Self::join_types(coercions, ", ")]
405 }
406 TypeSignature::Exact(types) => {
407 vec![Self::join_types(types, ", ")]
408 }
409 TypeSignature::Any(arg_count) => {
410 vec![std::iter::repeat("Any")
411 .take(*arg_count)
412 .collect::<Vec<&str>>()
413 .join(", ")]
414 }
415 TypeSignature::UserDefined => {
416 vec!["UserDefined".to_string()]
417 }
418 TypeSignature::VariadicAny => vec!["Any, .., Any".to_string()],
419 TypeSignature::OneOf(sigs) => {
420 sigs.iter().flat_map(|s| s.to_string_repr()).collect()
421 }
422 TypeSignature::ArraySignature(array_signature) => {
423 vec![array_signature.to_string()]
424 }
425 }
426 }
427
428 /// Helper function to join types with specified delimiter.
429 pub fn join_types<T: Display>(types: &[T], delimiter: &str) -> String {
430 types
431 .iter()
432 .map(|t| t.to_string())
433 .collect::<Vec<String>>()
434 .join(delimiter)
435 }
436
437 /// Check whether 0 input argument is valid for given `TypeSignature`
438 pub fn supports_zero_argument(&self) -> bool {
439 match &self {
440 TypeSignature::Exact(vec) => vec.is_empty(),
441 TypeSignature::Nullary => true,
442 TypeSignature::OneOf(types) => types
443 .iter()
444 .any(|type_sig| type_sig.supports_zero_argument()),
445 _ => false,
446 }
447 }
448
449 /// Returns true if the signature currently supports or used to supported 0
450 /// input arguments in a previous version of DataFusion.
451 pub fn used_to_support_zero_arguments(&self) -> bool {
452 match &self {
453 TypeSignature::Any(num) => *num == 0,
454 _ => self.supports_zero_argument(),
455 }
456 }
457
458 #[deprecated(since = "46.0.0", note = "See get_example_types instead")]
459 pub fn get_possible_types(&self) -> Vec<Vec<DataType>> {
460 self.get_example_types()
461 }
462
463 /// Return example acceptable types for this `TypeSignature`'
464 ///
465 /// Returns a `Vec<DataType>` for each argument to the function
466 ///
467 /// This is used for `information_schema` and can be used to generate
468 /// documentation or error messages.
469 pub fn get_example_types(&self) -> Vec<Vec<DataType>> {
470 match self {
471 TypeSignature::Exact(types) => vec![types.clone()],
472 TypeSignature::OneOf(types) => types
473 .iter()
474 .flat_map(|type_sig| type_sig.get_example_types())
475 .collect(),
476 TypeSignature::Uniform(arg_count, types) => types
477 .iter()
478 .cloned()
479 .map(|data_type| vec![data_type; *arg_count])
480 .collect(),
481 TypeSignature::Coercible(coercions) => coercions
482 .iter()
483 .map(|c| {
484 let mut all_types: IndexSet<DataType> =
485 c.desired_type().get_example_types().into_iter().collect();
486
487 if let Some(implicit_coercion) = c.implicit_coercion() {
488 let allowed_casts: Vec<DataType> = implicit_coercion
489 .allowed_source_types
490 .iter()
491 .flat_map(|t| t.get_example_types())
492 .collect();
493 all_types.extend(allowed_casts);
494 }
495
496 all_types.into_iter().collect::<Vec<_>>()
497 })
498 .multi_cartesian_product()
499 .collect(),
500 TypeSignature::Variadic(types) => types
501 .iter()
502 .cloned()
503 .map(|data_type| vec![data_type])
504 .collect(),
505 TypeSignature::Numeric(arg_count) => NUMERICS
506 .iter()
507 .cloned()
508 .map(|numeric_type| vec![numeric_type; *arg_count])
509 .collect(),
510 TypeSignature::String(arg_count) => get_data_types(&NativeType::String)
511 .into_iter()
512 .map(|dt| vec![dt; *arg_count])
513 .collect::<Vec<_>>(),
514 // TODO: Implement for other types
515 TypeSignature::Any(_)
516 | TypeSignature::Comparable(_)
517 | TypeSignature::Nullary
518 | TypeSignature::VariadicAny
519 | TypeSignature::ArraySignature(_)
520 | TypeSignature::UserDefined => vec![],
521 }
522 }
523}
524
525fn get_data_types(native_type: &NativeType) -> Vec<DataType> {
526 match native_type {
527 NativeType::Null => vec![DataType::Null],
528 NativeType::Boolean => vec![DataType::Boolean],
529 NativeType::Int8 => vec![DataType::Int8],
530 NativeType::Int16 => vec![DataType::Int16],
531 NativeType::Int32 => vec![DataType::Int32],
532 NativeType::Int64 => vec![DataType::Int64],
533 NativeType::UInt8 => vec![DataType::UInt8],
534 NativeType::UInt16 => vec![DataType::UInt16],
535 NativeType::UInt32 => vec![DataType::UInt32],
536 NativeType::UInt64 => vec![DataType::UInt64],
537 NativeType::Float16 => vec![DataType::Float16],
538 NativeType::Float32 => vec![DataType::Float32],
539 NativeType::Float64 => vec![DataType::Float64],
540 NativeType::Date => vec![DataType::Date32, DataType::Date64],
541 NativeType::Binary => vec![
542 DataType::Binary,
543 DataType::LargeBinary,
544 DataType::BinaryView,
545 ],
546 NativeType::String => {
547 vec![DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View]
548 }
549 // TODO: support other native types
550 _ => vec![],
551 }
552}
553
554/// Represents type coercion rules for function arguments, specifying both the desired type
555/// and optional implicit coercion rules for source types.
556///
557/// # Examples
558///
559/// ```
560/// use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
561/// use datafusion_common::types::{NativeType, logical_binary, logical_string};
562///
563/// // Exact coercion that only accepts timestamp types
564/// let exact = Coercion::new_exact(TypeSignatureClass::Timestamp);
565///
566/// // Implicit coercion that accepts string types but can coerce from binary types
567/// let implicit = Coercion::new_implicit(
568/// TypeSignatureClass::Native(logical_string()),
569/// vec![TypeSignatureClass::Native(logical_binary())],
570/// NativeType::String
571/// );
572/// ```
573///
574/// There are two variants:
575///
576/// * `Exact` - Only accepts arguments that exactly match the desired type
577/// * `Implicit` - Accepts the desired type and can coerce from specified source types
578#[derive(Debug, Clone, Eq, PartialOrd)]
579pub enum Coercion {
580 /// Coercion that only accepts arguments exactly matching the desired type.
581 Exact {
582 /// The required type for the argument
583 desired_type: TypeSignatureClass,
584 },
585
586 /// Coercion that accepts the desired type and can implicitly coerce from other types.
587 Implicit {
588 /// The primary desired type for the argument
589 desired_type: TypeSignatureClass,
590 /// Rules for implicit coercion from other types
591 implicit_coercion: ImplicitCoercion,
592 },
593}
594
595impl Coercion {
596 pub fn new_exact(desired_type: TypeSignatureClass) -> Self {
597 Self::Exact { desired_type }
598 }
599
600 /// Create a new coercion with implicit coercion rules.
601 ///
602 /// `allowed_source_types` defines the possible types that can be coerced to `desired_type`.
603 /// `default_casted_type` is the default type to be used for coercion if we cast from other types via `allowed_source_types`.
604 pub fn new_implicit(
605 desired_type: TypeSignatureClass,
606 allowed_source_types: Vec<TypeSignatureClass>,
607 default_casted_type: NativeType,
608 ) -> Self {
609 Self::Implicit {
610 desired_type,
611 implicit_coercion: ImplicitCoercion {
612 allowed_source_types,
613 default_casted_type,
614 },
615 }
616 }
617
618 pub fn allowed_source_types(&self) -> &[TypeSignatureClass] {
619 match self {
620 Coercion::Exact { .. } => &[],
621 Coercion::Implicit {
622 implicit_coercion, ..
623 } => implicit_coercion.allowed_source_types.as_slice(),
624 }
625 }
626
627 pub fn default_casted_type(&self) -> Option<&NativeType> {
628 match self {
629 Coercion::Exact { .. } => None,
630 Coercion::Implicit {
631 implicit_coercion, ..
632 } => Some(&implicit_coercion.default_casted_type),
633 }
634 }
635
636 pub fn desired_type(&self) -> &TypeSignatureClass {
637 match self {
638 Coercion::Exact { desired_type } => desired_type,
639 Coercion::Implicit { desired_type, .. } => desired_type,
640 }
641 }
642
643 pub fn implicit_coercion(&self) -> Option<&ImplicitCoercion> {
644 match self {
645 Coercion::Exact { .. } => None,
646 Coercion::Implicit {
647 implicit_coercion, ..
648 } => Some(implicit_coercion),
649 }
650 }
651}
652
653impl Display for Coercion {
654 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
655 write!(f, "Coercion({}", self.desired_type())?;
656 if let Some(implicit_coercion) = self.implicit_coercion() {
657 write!(f, ", implicit_coercion={implicit_coercion}",)
658 } else {
659 write!(f, ")")
660 }
661 }
662}
663
664impl PartialEq for Coercion {
665 fn eq(&self, other: &Self) -> bool {
666 self.desired_type() == other.desired_type()
667 && self.implicit_coercion() == other.implicit_coercion()
668 }
669}
670
671impl Hash for Coercion {
672 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
673 self.desired_type().hash(state);
674 self.implicit_coercion().hash(state);
675 }
676}
677
678/// Defines rules for implicit type coercion, specifying which source types can be
679/// coerced and the default type to use when coercing.
680///
681/// This is used by functions to specify which types they can accept via implicit
682/// coercion in addition to their primary desired type.
683///
684/// # Examples
685///
686/// ```
687/// use arrow::datatypes::TimeUnit;
688///
689/// use datafusion_expr_common::signature::{Coercion, ImplicitCoercion, TypeSignatureClass};
690/// use datafusion_common::types::{NativeType, logical_binary};
691///
692/// // Allow coercing from binary types to timestamp, coerce to specific timestamp unit and timezone
693/// let implicit = Coercion::new_implicit(
694/// TypeSignatureClass::Timestamp,
695/// vec![TypeSignatureClass::Native(logical_binary())],
696/// NativeType::Timestamp(TimeUnit::Second, None),
697/// );
698/// ```
699#[derive(Debug, Clone, Eq, PartialOrd)]
700pub struct ImplicitCoercion {
701 /// The types that can be coerced from via implicit casting
702 allowed_source_types: Vec<TypeSignatureClass>,
703
704 /// The default type to use when coercing from allowed source types.
705 /// This is particularly important for types like Timestamp that have multiple
706 /// possible configurations (different time units and timezones).
707 default_casted_type: NativeType,
708}
709
710impl Display for ImplicitCoercion {
711 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
712 write!(
713 f,
714 "ImplicitCoercion({:?}, default_type={:?})",
715 self.allowed_source_types, self.default_casted_type
716 )
717 }
718}
719
720impl PartialEq for ImplicitCoercion {
721 fn eq(&self, other: &Self) -> bool {
722 self.allowed_source_types == other.allowed_source_types
723 && self.default_casted_type == other.default_casted_type
724 }
725}
726
727impl Hash for ImplicitCoercion {
728 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
729 self.allowed_source_types.hash(state);
730 self.default_casted_type.hash(state);
731 }
732}
733
734/// Defines the supported argument types ([`TypeSignature`]) and [`Volatility`] for a function.
735///
736/// DataFusion will automatically coerce (cast) argument types to one of the supported
737/// function signatures, if possible.
738#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
739pub struct Signature {
740 /// The data types that the function accepts. See [TypeSignature] for more information.
741 pub type_signature: TypeSignature,
742 /// The volatility of the function. See [Volatility] for more information.
743 pub volatility: Volatility,
744}
745
746impl Signature {
747 /// Creates a new Signature from a given type signature and volatility.
748 pub fn new(type_signature: TypeSignature, volatility: Volatility) -> Self {
749 Signature {
750 type_signature,
751 volatility,
752 }
753 }
754 /// An arbitrary number of arguments with the same type, from those listed in `common_types`.
755 pub fn variadic(common_types: Vec<DataType>, volatility: Volatility) -> Self {
756 Self {
757 type_signature: TypeSignature::Variadic(common_types),
758 volatility,
759 }
760 }
761 /// User-defined coercion rules for the function.
762 pub fn user_defined(volatility: Volatility) -> Self {
763 Self {
764 type_signature: TypeSignature::UserDefined,
765 volatility,
766 }
767 }
768
769 /// A specified number of numeric arguments
770 pub fn numeric(arg_count: usize, volatility: Volatility) -> Self {
771 Self {
772 type_signature: TypeSignature::Numeric(arg_count),
773 volatility,
774 }
775 }
776
777 /// A specified number of numeric arguments
778 pub fn string(arg_count: usize, volatility: Volatility) -> Self {
779 Self {
780 type_signature: TypeSignature::String(arg_count),
781 volatility,
782 }
783 }
784
785 /// An arbitrary number of arguments of any type.
786 pub fn variadic_any(volatility: Volatility) -> Self {
787 Self {
788 type_signature: TypeSignature::VariadicAny,
789 volatility,
790 }
791 }
792 /// A fixed number of arguments of the same type, from those listed in `valid_types`.
793 pub fn uniform(
794 arg_count: usize,
795 valid_types: Vec<DataType>,
796 volatility: Volatility,
797 ) -> Self {
798 Self {
799 type_signature: TypeSignature::Uniform(arg_count, valid_types),
800 volatility,
801 }
802 }
803 /// Exactly matches the types in `exact_types`, in order.
804 pub fn exact(exact_types: Vec<DataType>, volatility: Volatility) -> Self {
805 Signature {
806 type_signature: TypeSignature::Exact(exact_types),
807 volatility,
808 }
809 }
810
811 /// Target coerce types in order
812 pub fn coercible(target_types: Vec<Coercion>, volatility: Volatility) -> Self {
813 Self {
814 type_signature: TypeSignature::Coercible(target_types),
815 volatility,
816 }
817 }
818
819 /// Used for function that expects comparable data types, it will try to coerced all the types into single final one.
820 pub fn comparable(arg_count: usize, volatility: Volatility) -> Self {
821 Self {
822 type_signature: TypeSignature::Comparable(arg_count),
823 volatility,
824 }
825 }
826
827 pub fn nullary(volatility: Volatility) -> Self {
828 Signature {
829 type_signature: TypeSignature::Nullary,
830 volatility,
831 }
832 }
833
834 /// A specified number of arguments of any type
835 pub fn any(arg_count: usize, volatility: Volatility) -> Self {
836 Signature {
837 type_signature: TypeSignature::Any(arg_count),
838 volatility,
839 }
840 }
841 /// Any one of a list of [TypeSignature]s.
842 pub fn one_of(type_signatures: Vec<TypeSignature>, volatility: Volatility) -> Self {
843 Signature {
844 type_signature: TypeSignature::OneOf(type_signatures),
845 volatility,
846 }
847 }
848 /// Specialized Signature for ArrayAppend and similar functions
849 pub fn array_and_element(volatility: Volatility) -> Self {
850 Signature {
851 type_signature: TypeSignature::ArraySignature(
852 ArrayFunctionSignature::Array {
853 arguments: vec![
854 ArrayFunctionArgument::Array,
855 ArrayFunctionArgument::Element,
856 ],
857 array_coercion: Some(ListCoercion::FixedSizedListToList),
858 },
859 ),
860 volatility,
861 }
862 }
863 /// Specialized Signature for Array functions with an optional index
864 pub fn array_and_element_and_optional_index(volatility: Volatility) -> Self {
865 Signature {
866 type_signature: TypeSignature::OneOf(vec![
867 TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
868 arguments: vec![
869 ArrayFunctionArgument::Array,
870 ArrayFunctionArgument::Element,
871 ],
872 array_coercion: None,
873 }),
874 TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
875 arguments: vec![
876 ArrayFunctionArgument::Array,
877 ArrayFunctionArgument::Element,
878 ArrayFunctionArgument::Index,
879 ],
880 array_coercion: None,
881 }),
882 ]),
883 volatility,
884 }
885 }
886
887 /// Specialized Signature for ArrayElement and similar functions
888 pub fn array_and_index(volatility: Volatility) -> Self {
889 Signature {
890 type_signature: TypeSignature::ArraySignature(
891 ArrayFunctionSignature::Array {
892 arguments: vec![
893 ArrayFunctionArgument::Array,
894 ArrayFunctionArgument::Index,
895 ],
896 array_coercion: None,
897 },
898 ),
899 volatility,
900 }
901 }
902 /// Specialized Signature for ArrayEmpty and similar functions
903 pub fn array(volatility: Volatility) -> Self {
904 Signature {
905 type_signature: TypeSignature::ArraySignature(
906 ArrayFunctionSignature::Array {
907 arguments: vec![ArrayFunctionArgument::Array],
908 array_coercion: None,
909 },
910 ),
911 volatility,
912 }
913 }
914}
915
916#[cfg(test)]
917mod tests {
918 use datafusion_common::types::{logical_int64, logical_string};
919
920 use super::*;
921
922 #[test]
923 fn supports_zero_argument_tests() {
924 // Testing `TypeSignature`s which supports 0 arg
925 let positive_cases = vec![
926 TypeSignature::Exact(vec![]),
927 TypeSignature::OneOf(vec![
928 TypeSignature::Exact(vec![DataType::Int8]),
929 TypeSignature::Nullary,
930 TypeSignature::Uniform(1, vec![DataType::Int8]),
931 ]),
932 TypeSignature::Nullary,
933 ];
934
935 for case in positive_cases {
936 assert!(
937 case.supports_zero_argument(),
938 "Expected {:?} to support zero arguments",
939 case
940 );
941 }
942
943 // Testing `TypeSignature`s which doesn't support 0 arg
944 let negative_cases = vec![
945 TypeSignature::Exact(vec![DataType::Utf8]),
946 TypeSignature::Uniform(1, vec![DataType::Float64]),
947 TypeSignature::Any(1),
948 TypeSignature::VariadicAny,
949 TypeSignature::OneOf(vec![
950 TypeSignature::Exact(vec![DataType::Int8]),
951 TypeSignature::Uniform(1, vec![DataType::Int8]),
952 ]),
953 ];
954
955 for case in negative_cases {
956 assert!(
957 !case.supports_zero_argument(),
958 "Expected {:?} not to support zero arguments",
959 case
960 );
961 }
962 }
963
964 #[test]
965 fn type_signature_partial_ord() {
966 // Test validates that partial ord is defined for TypeSignature and Signature.
967 assert!(TypeSignature::UserDefined < TypeSignature::VariadicAny);
968 assert!(TypeSignature::UserDefined < TypeSignature::Any(1));
969
970 assert!(
971 TypeSignature::Uniform(1, vec![DataType::Null])
972 < TypeSignature::Uniform(1, vec![DataType::Boolean])
973 );
974 assert!(
975 TypeSignature::Uniform(1, vec![DataType::Null])
976 < TypeSignature::Uniform(2, vec![DataType::Null])
977 );
978 assert!(
979 TypeSignature::Uniform(usize::MAX, vec![DataType::Null])
980 < TypeSignature::Exact(vec![DataType::Null])
981 );
982 }
983
984 #[test]
985 fn test_get_possible_types() {
986 let type_signature = TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]);
987 let possible_types = type_signature.get_example_types();
988 assert_eq!(possible_types, vec![vec![DataType::Int32, DataType::Int64]]);
989
990 let type_signature = TypeSignature::OneOf(vec![
991 TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]),
992 TypeSignature::Exact(vec![DataType::Float32, DataType::Float64]),
993 ]);
994 let possible_types = type_signature.get_example_types();
995 assert_eq!(
996 possible_types,
997 vec![
998 vec![DataType::Int32, DataType::Int64],
999 vec![DataType::Float32, DataType::Float64]
1000 ]
1001 );
1002
1003 let type_signature = TypeSignature::OneOf(vec![
1004 TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]),
1005 TypeSignature::Exact(vec![DataType::Float32, DataType::Float64]),
1006 TypeSignature::Exact(vec![DataType::Utf8]),
1007 ]);
1008 let possible_types = type_signature.get_example_types();
1009 assert_eq!(
1010 possible_types,
1011 vec![
1012 vec![DataType::Int32, DataType::Int64],
1013 vec![DataType::Float32, DataType::Float64],
1014 vec![DataType::Utf8]
1015 ]
1016 );
1017
1018 let type_signature =
1019 TypeSignature::Uniform(2, vec![DataType::Float32, DataType::Int64]);
1020 let possible_types = type_signature.get_example_types();
1021 assert_eq!(
1022 possible_types,
1023 vec![
1024 vec![DataType::Float32, DataType::Float32],
1025 vec![DataType::Int64, DataType::Int64]
1026 ]
1027 );
1028
1029 let type_signature = TypeSignature::Coercible(vec![
1030 Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
1031 Coercion::new_exact(TypeSignatureClass::Native(logical_int64())),
1032 ]);
1033 let possible_types = type_signature.get_example_types();
1034 assert_eq!(
1035 possible_types,
1036 vec![
1037 vec![DataType::Utf8, DataType::Int64],
1038 vec![DataType::LargeUtf8, DataType::Int64],
1039 vec![DataType::Utf8View, DataType::Int64]
1040 ]
1041 );
1042
1043 let type_signature =
1044 TypeSignature::Variadic(vec![DataType::Int32, DataType::Int64]);
1045 let possible_types = type_signature.get_example_types();
1046 assert_eq!(
1047 possible_types,
1048 vec![vec![DataType::Int32], vec![DataType::Int64]]
1049 );
1050
1051 let type_signature = TypeSignature::Numeric(2);
1052 let possible_types = type_signature.get_example_types();
1053 assert_eq!(
1054 possible_types,
1055 vec![
1056 vec![DataType::Int8, DataType::Int8],
1057 vec![DataType::Int16, DataType::Int16],
1058 vec![DataType::Int32, DataType::Int32],
1059 vec![DataType::Int64, DataType::Int64],
1060 vec![DataType::UInt8, DataType::UInt8],
1061 vec![DataType::UInt16, DataType::UInt16],
1062 vec![DataType::UInt32, DataType::UInt32],
1063 vec![DataType::UInt64, DataType::UInt64],
1064 vec![DataType::Float32, DataType::Float32],
1065 vec![DataType::Float64, DataType::Float64]
1066 ]
1067 );
1068
1069 let type_signature = TypeSignature::String(2);
1070 let possible_types = type_signature.get_example_types();
1071 assert_eq!(
1072 possible_types,
1073 vec![
1074 vec![DataType::Utf8, DataType::Utf8],
1075 vec![DataType::LargeUtf8, DataType::LargeUtf8],
1076 vec![DataType::Utf8View, DataType::Utf8View]
1077 ]
1078 );
1079 }
1080}