datafusion_expr_common/signature.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Function signatures: [`Volatility`], [`Signature`] and [`TypeSignature`]
19
20use std::fmt::Display;
21use std::hash::Hash;
22
23use crate::type_coercion::aggregates::NUMERICS;
24use arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
25use datafusion_common::internal_err;
26use datafusion_common::types::{LogicalType, LogicalTypeRef, NativeType};
27use datafusion_common::utils::ListCoercion;
28use indexmap::IndexSet;
29use itertools::Itertools;
30
31/// Constant that is used as a placeholder for any valid timezone.
32/// This is used where a function can accept a timestamp type with any
33/// valid timezone, it exists to avoid the need to enumerate all possible
34/// timezones. See [`TypeSignature`] for more details.
35///
36/// Type coercion always ensures that functions will be executed using
37/// timestamp arrays that have a valid time zone. Functions must never
38/// return results with this timezone.
39pub const TIMEZONE_WILDCARD: &str = "+TZ";
40
41/// Constant that is used as a placeholder for any valid fixed size list.
42/// This is used where a function can accept a fixed size list type with any
43/// valid length. It exists to avoid the need to enumerate all possible fixed size list lengths.
44pub const FIXED_SIZE_LIST_WILDCARD: i32 = i32::MIN;
45
46/// How a function's output changes with respect to a fixed input
47///
48/// The volatility of a function determines eligibility for certain
49/// optimizations. You should always define your function to have the strictest
50/// possible volatility to maximize performance and avoid unexpected
51/// results.
52///
53#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
54pub enum Volatility {
55 /// Always returns the same output when given the same input.
56 ///
57 /// DataFusion will inline immutable functions during planning.
58 ///
59 /// For example, the `abs` function is immutable, so `abs(-1)` will be
60 /// evaluated and replaced with `1` during planning rather than invoking
61 /// the function at runtime.
62 Immutable,
63 /// May return different values given the same input across different
64 /// queries but must return the same value for a given input within a query.
65 ///
66 /// For example, the `now()` function is stable, because the query `select
67 /// col1, now() from t1`, will return different results each time it is run,
68 /// but within the same query, the output of the `now()` function has the
69 /// same value for each output row.
70 ///
71 /// DataFusion will inline `Stable` functions when possible. For example,
72 /// `Stable` functions are inlined when planning a query for execution, but
73 /// not in View definitions or prepared statements.
74 Stable,
75 /// May change the return value from evaluation to evaluation.
76 ///
77 /// Multiple invocations of a volatile function may return different results
78 /// when used in the same query on different rows. An example of this is the
79 /// `random()` function.
80 ///
81 /// DataFusion can not evaluate such functions during planning or push these
82 /// predicates into scans. In the query `select col1, random() from t1`,
83 /// `random()` function will be evaluated for each output row, resulting in
84 /// a unique random value for each row.
85 Volatile,
86}
87
88/// The types of arguments for which a function has implementations.
89///
90/// [`TypeSignature`] **DOES NOT** define the types that a user query could call the
91/// function with. DataFusion will automatically coerce (cast) argument types to
92/// one of the supported function signatures, if possible.
93///
94/// # Overview
95/// Functions typically provide implementations for a small number of different
96/// argument [`DataType`]s, rather than all possible combinations. If a user
97/// calls a function with arguments that do not match any of the declared types,
98/// DataFusion will attempt to automatically coerce (add casts to) function
99/// arguments so they match the [`TypeSignature`]. See the [`type_coercion`] module
100/// for more details
101///
102/// # Example: Numeric Functions
103/// For example, a function like `cos` may only provide an implementation for
104/// [`DataType::Float64`]. When users call `cos` with a different argument type,
105/// such as `cos(int_column)`, and type coercion automatically adds a cast such
106/// as `cos(CAST int_column AS DOUBLE)` during planning.
107///
108/// [`type_coercion`]: crate::type_coercion
109///
110/// ## Example: Strings
111///
112/// There are several different string types in Arrow, such as
113/// [`DataType::Utf8`], [`DataType::LargeUtf8`], and [`DataType::Utf8View`].
114///
115/// Some functions may have specialized implementations for these types, while others
116/// may be able to handle only one of them. For example, a function that
117/// only works with [`DataType::Utf8View`] would have the following signature:
118///
119/// ```
120/// # use arrow::datatypes::DataType;
121/// # use datafusion_expr_common::signature::{TypeSignature};
122/// // Declares the function must be invoked with a single argument of type `Utf8View`.
123/// // if a user calls the function with `Utf8` or `LargeUtf8`, DataFusion will
124/// // automatically add a cast to `Utf8View` during planning.
125/// let type_signature = TypeSignature::Exact(vec![DataType::Utf8View]);
126///
127/// ```
128///
129/// # Example: Timestamps
130///
131/// Types to match are represented using Arrow's [`DataType`]. [`DataType::Timestamp`] has an optional variable
132/// timezone specification. To specify a function can handle a timestamp with *ANY* timezone, use
133/// the [`TIMEZONE_WILDCARD`]. For example:
134///
135/// ```
136/// # use arrow::datatypes::{DataType, TimeUnit};
137/// # use datafusion_expr_common::signature::{TIMEZONE_WILDCARD, TypeSignature};
138/// let type_signature = TypeSignature::Exact(vec![
139/// // A nanosecond precision timestamp with ANY timezone
140/// // matches Timestamp(Nanosecond, Some("+0:00"))
141/// // matches Timestamp(Nanosecond, Some("+5:00"))
142/// // does not match Timestamp(Nanosecond, None)
143/// DataType::Timestamp(TimeUnit::Nanosecond, Some(TIMEZONE_WILDCARD.into())),
144/// ]);
145/// ```
146#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
147pub enum TypeSignature {
148 /// One or more arguments of a common type out of a list of valid types.
149 ///
150 /// For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]).
151 ///
152 /// # Examples
153 ///
154 /// A function such as `concat` is `Variadic(vec![DataType::Utf8,
155 /// DataType::LargeUtf8])`
156 Variadic(Vec<DataType>),
157 /// The acceptable signature and coercions rules are special for this
158 /// function.
159 ///
160 /// If this signature is specified,
161 /// DataFusion will call [`ScalarUDFImpl::coerce_types`] to prepare argument types.
162 ///
163 /// [`ScalarUDFImpl::coerce_types`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/trait.ScalarUDFImpl.html#method.coerce_types
164 UserDefined,
165 /// One or more arguments with arbitrary types
166 VariadicAny,
167 /// One or more arguments of an arbitrary but equal type out of a list of valid types.
168 ///
169 /// # Examples
170 ///
171 /// 1. A function of one argument of f64 is `Uniform(1, vec![DataType::Float64])`
172 /// 2. A function of one argument of f64 or f32 is `Uniform(1, vec![DataType::Float32, DataType::Float64])`
173 Uniform(usize, Vec<DataType>),
174 /// One or more arguments with exactly the specified types in order.
175 ///
176 /// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`].
177 Exact(Vec<DataType>),
178 /// One or more arguments belonging to the [`TypeSignatureClass`], in order.
179 ///
180 /// [`Coercion`] contains not only the desired type but also the allowed
181 /// casts. For example, if you expect a function has string type, but you
182 /// also allow it to be casted from binary type.
183 ///
184 /// For functions that take no arguments (e.g. `random()`) see [`TypeSignature::Nullary`].
185 Coercible(Vec<Coercion>),
186 /// One or more arguments coercible to a single, comparable type.
187 ///
188 /// Each argument will be coerced to a single type using the
189 /// coercion rules described in [`comparison_coercion_numeric`].
190 ///
191 /// # Examples
192 ///
193 /// If the `nullif(1, 2)` function is called with `i32` and `i64` arguments
194 /// the types will both be coerced to `i64` before the function is invoked.
195 ///
196 /// If the `nullif('1', 2)` function is called with `Utf8` and `i64` arguments
197 /// the types will both be coerced to `Utf8` before the function is invoked.
198 ///
199 /// Note:
200 /// - For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]).
201 /// - If all arguments have type [`DataType::Null`], they are coerced to `Utf8`
202 ///
203 /// [`comparison_coercion_numeric`]: crate::type_coercion::binary::comparison_coercion_numeric
204 Comparable(usize),
205 /// One or more arguments of arbitrary types.
206 ///
207 /// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`].
208 Any(usize),
209 /// Matches exactly one of a list of [`TypeSignature`]s.
210 ///
211 /// Coercion is attempted to match the signatures in order, and stops after
212 /// the first success, if any.
213 ///
214 /// # Examples
215 ///
216 /// Since `make_array` takes 0 or more arguments with arbitrary types, its `TypeSignature`
217 /// is `OneOf(vec![Any(0), VariadicAny])`.
218 OneOf(Vec<TypeSignature>),
219 /// A function that has an [`ArrayFunctionSignature`]
220 ArraySignature(ArrayFunctionSignature),
221 /// One or more arguments of numeric types.
222 ///
223 /// See [`NativeType::is_numeric`] to know which type is considered numeric
224 ///
225 /// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`].
226 ///
227 /// [`NativeType::is_numeric`]: datafusion_common::types::NativeType::is_numeric
228 Numeric(usize),
229 /// One or arguments of all the same string types.
230 ///
231 /// The precedence of type from high to low is Utf8View, LargeUtf8 and Utf8.
232 /// Null is considered as `Utf8` by default
233 /// Dictionary with string value type is also handled.
234 ///
235 /// For example, if a function is called with (utf8, large_utf8), all
236 /// arguments will be coerced to `LargeUtf8`
237 ///
238 /// For functions that take no arguments (e.g. `random()` use [`TypeSignature::Nullary`]).
239 String(usize),
240 /// No arguments
241 Nullary,
242}
243
244impl TypeSignature {
245 #[inline]
246 pub fn is_one_of(&self) -> bool {
247 matches!(self, TypeSignature::OneOf(_))
248 }
249}
250
251/// Represents the class of types that can be used in a function signature.
252///
253/// This is used to specify what types are valid for function arguments in a more flexible way than
254/// just listing specific DataTypes. For example, TypeSignatureClass::Timestamp matches any timestamp
255/// type regardless of timezone or precision.
256///
257/// Used primarily with [`TypeSignature::Coercible`] to define function signatures that can accept
258/// arguments that can be coerced to a particular class of types.
259#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash)]
260pub enum TypeSignatureClass {
261 Timestamp,
262 Time,
263 Interval,
264 Duration,
265 Native(LogicalTypeRef),
266 // TODO:
267 // Numeric
268 Integer,
269}
270
271impl Display for TypeSignatureClass {
272 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
273 write!(f, "TypeSignatureClass::{self:?}")
274 }
275}
276
277impl TypeSignatureClass {
278 /// Get example acceptable types for this `TypeSignatureClass`
279 ///
280 /// This is used for `information_schema` and can be used to generate
281 /// documentation or error messages.
282 fn get_example_types(&self) -> Vec<DataType> {
283 match self {
284 TypeSignatureClass::Native(l) => get_data_types(l.native()),
285 TypeSignatureClass::Timestamp => {
286 vec![
287 DataType::Timestamp(TimeUnit::Nanosecond, None),
288 DataType::Timestamp(
289 TimeUnit::Nanosecond,
290 Some(TIMEZONE_WILDCARD.into()),
291 ),
292 ]
293 }
294 TypeSignatureClass::Time => {
295 vec![DataType::Time64(TimeUnit::Nanosecond)]
296 }
297 TypeSignatureClass::Interval => {
298 vec![DataType::Interval(IntervalUnit::DayTime)]
299 }
300 TypeSignatureClass::Duration => {
301 vec![DataType::Duration(TimeUnit::Nanosecond)]
302 }
303 TypeSignatureClass::Integer => {
304 vec![DataType::Int64]
305 }
306 }
307 }
308
309 /// Does the specified `NativeType` match this type signature class?
310 pub fn matches_native_type(
311 self: &TypeSignatureClass,
312 logical_type: &NativeType,
313 ) -> bool {
314 if logical_type == &NativeType::Null {
315 return true;
316 }
317
318 match self {
319 TypeSignatureClass::Native(t) if t.native() == logical_type => true,
320 TypeSignatureClass::Timestamp if logical_type.is_timestamp() => true,
321 TypeSignatureClass::Time if logical_type.is_time() => true,
322 TypeSignatureClass::Interval if logical_type.is_interval() => true,
323 TypeSignatureClass::Duration if logical_type.is_duration() => true,
324 TypeSignatureClass::Integer if logical_type.is_integer() => true,
325 _ => false,
326 }
327 }
328
329 /// What type would `origin_type` be casted to when casting to the specified native type?
330 pub fn default_casted_type(
331 &self,
332 native_type: &NativeType,
333 origin_type: &DataType,
334 ) -> datafusion_common::Result<DataType> {
335 match self {
336 TypeSignatureClass::Native(logical_type) => {
337 logical_type.native().default_cast_for(origin_type)
338 }
339 // If the given type is already a timestamp, we don't change the unit and timezone
340 TypeSignatureClass::Timestamp if native_type.is_timestamp() => {
341 Ok(origin_type.to_owned())
342 }
343 TypeSignatureClass::Time if native_type.is_time() => {
344 Ok(origin_type.to_owned())
345 }
346 TypeSignatureClass::Interval if native_type.is_interval() => {
347 Ok(origin_type.to_owned())
348 }
349 TypeSignatureClass::Duration if native_type.is_duration() => {
350 Ok(origin_type.to_owned())
351 }
352 TypeSignatureClass::Integer if native_type.is_integer() => {
353 Ok(origin_type.to_owned())
354 }
355 _ => internal_err!("May miss the matching logic in `matches_native_type`"),
356 }
357 }
358}
359
360#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
361pub enum ArrayFunctionSignature {
362 /// A function takes at least one List/LargeList/FixedSizeList argument.
363 Array {
364 /// A full list of the arguments accepted by this function.
365 arguments: Vec<ArrayFunctionArgument>,
366 /// Additional information about how array arguments should be coerced.
367 array_coercion: Option<ListCoercion>,
368 },
369 /// A function takes a single argument that must be a List/LargeList/FixedSizeList
370 /// which gets coerced to List, with element type recursively coerced to List too if it is list-like.
371 RecursiveArray,
372 /// Specialized Signature for MapArray
373 /// The function takes a single argument that must be a MapArray
374 MapArray,
375}
376
377impl Display for ArrayFunctionSignature {
378 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
379 match self {
380 ArrayFunctionSignature::Array { arguments, .. } => {
381 for (idx, argument) in arguments.iter().enumerate() {
382 write!(f, "{argument}")?;
383 if idx != arguments.len() - 1 {
384 write!(f, ", ")?;
385 }
386 }
387 Ok(())
388 }
389 ArrayFunctionSignature::RecursiveArray => {
390 write!(f, "recursive_array")
391 }
392 ArrayFunctionSignature::MapArray => {
393 write!(f, "map_array")
394 }
395 }
396 }
397}
398
399#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
400pub enum ArrayFunctionArgument {
401 /// A non-list or list argument. The list dimensions should be one less than the Array's list
402 /// dimensions.
403 Element,
404 /// An Int64 index argument.
405 Index,
406 /// An argument of type List/LargeList/FixedSizeList. All Array arguments must be coercible
407 /// to the same type.
408 Array,
409 // A Utf8 argument.
410 String,
411}
412
413impl Display for ArrayFunctionArgument {
414 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
415 match self {
416 ArrayFunctionArgument::Element => {
417 write!(f, "element")
418 }
419 ArrayFunctionArgument::Index => {
420 write!(f, "index")
421 }
422 ArrayFunctionArgument::Array => {
423 write!(f, "array")
424 }
425 ArrayFunctionArgument::String => {
426 write!(f, "string")
427 }
428 }
429 }
430}
431
432impl TypeSignature {
433 pub fn to_string_repr(&self) -> Vec<String> {
434 match self {
435 TypeSignature::Nullary => {
436 vec!["NullAry()".to_string()]
437 }
438 TypeSignature::Variadic(types) => {
439 vec![format!("{}, ..", Self::join_types(types, "/"))]
440 }
441 TypeSignature::Uniform(arg_count, valid_types) => {
442 vec![
443 std::iter::repeat_n(Self::join_types(valid_types, "/"), *arg_count)
444 .collect::<Vec<String>>()
445 .join(", "),
446 ]
447 }
448 TypeSignature::String(num) => {
449 vec![format!("String({num})")]
450 }
451 TypeSignature::Numeric(num) => {
452 vec![format!("Numeric({num})")]
453 }
454 TypeSignature::Comparable(num) => {
455 vec![format!("Comparable({num})")]
456 }
457 TypeSignature::Coercible(coercions) => {
458 vec![Self::join_types(coercions, ", ")]
459 }
460 TypeSignature::Exact(types) => {
461 vec![Self::join_types(types, ", ")]
462 }
463 TypeSignature::Any(arg_count) => {
464 vec![std::iter::repeat_n("Any", *arg_count)
465 .collect::<Vec<&str>>()
466 .join(", ")]
467 }
468 TypeSignature::UserDefined => {
469 vec!["UserDefined".to_string()]
470 }
471 TypeSignature::VariadicAny => vec!["Any, .., Any".to_string()],
472 TypeSignature::OneOf(sigs) => {
473 sigs.iter().flat_map(|s| s.to_string_repr()).collect()
474 }
475 TypeSignature::ArraySignature(array_signature) => {
476 vec![array_signature.to_string()]
477 }
478 }
479 }
480
481 /// Helper function to join types with specified delimiter.
482 pub fn join_types<T: Display>(types: &[T], delimiter: &str) -> String {
483 types
484 .iter()
485 .map(|t| t.to_string())
486 .collect::<Vec<String>>()
487 .join(delimiter)
488 }
489
490 /// Check whether 0 input argument is valid for given `TypeSignature`
491 pub fn supports_zero_argument(&self) -> bool {
492 match &self {
493 TypeSignature::Exact(vec) => vec.is_empty(),
494 TypeSignature::Nullary => true,
495 TypeSignature::OneOf(types) => types
496 .iter()
497 .any(|type_sig| type_sig.supports_zero_argument()),
498 _ => false,
499 }
500 }
501
502 /// Returns true if the signature currently supports or used to supported 0
503 /// input arguments in a previous version of DataFusion.
504 pub fn used_to_support_zero_arguments(&self) -> bool {
505 match &self {
506 TypeSignature::Any(num) => *num == 0,
507 _ => self.supports_zero_argument(),
508 }
509 }
510
511 #[deprecated(since = "46.0.0", note = "See get_example_types instead")]
512 pub fn get_possible_types(&self) -> Vec<Vec<DataType>> {
513 self.get_example_types()
514 }
515
516 /// Return example acceptable types for this `TypeSignature`'
517 ///
518 /// Returns a `Vec<DataType>` for each argument to the function
519 ///
520 /// This is used for `information_schema` and can be used to generate
521 /// documentation or error messages.
522 pub fn get_example_types(&self) -> Vec<Vec<DataType>> {
523 match self {
524 TypeSignature::Exact(types) => vec![types.clone()],
525 TypeSignature::OneOf(types) => types
526 .iter()
527 .flat_map(|type_sig| type_sig.get_example_types())
528 .collect(),
529 TypeSignature::Uniform(arg_count, types) => types
530 .iter()
531 .cloned()
532 .map(|data_type| vec![data_type; *arg_count])
533 .collect(),
534 TypeSignature::Coercible(coercions) => coercions
535 .iter()
536 .map(|c| {
537 let mut all_types: IndexSet<DataType> =
538 c.desired_type().get_example_types().into_iter().collect();
539
540 if let Some(implicit_coercion) = c.implicit_coercion() {
541 let allowed_casts: Vec<DataType> = implicit_coercion
542 .allowed_source_types
543 .iter()
544 .flat_map(|t| t.get_example_types())
545 .collect();
546 all_types.extend(allowed_casts);
547 }
548
549 all_types.into_iter().collect::<Vec<_>>()
550 })
551 .multi_cartesian_product()
552 .collect(),
553 TypeSignature::Variadic(types) => types
554 .iter()
555 .cloned()
556 .map(|data_type| vec![data_type])
557 .collect(),
558 TypeSignature::Numeric(arg_count) => NUMERICS
559 .iter()
560 .cloned()
561 .map(|numeric_type| vec![numeric_type; *arg_count])
562 .collect(),
563 TypeSignature::String(arg_count) => get_data_types(&NativeType::String)
564 .into_iter()
565 .map(|dt| vec![dt; *arg_count])
566 .collect::<Vec<_>>(),
567 // TODO: Implement for other types
568 TypeSignature::Any(_)
569 | TypeSignature::Comparable(_)
570 | TypeSignature::Nullary
571 | TypeSignature::VariadicAny
572 | TypeSignature::ArraySignature(_)
573 | TypeSignature::UserDefined => vec![],
574 }
575 }
576}
577
578fn get_data_types(native_type: &NativeType) -> Vec<DataType> {
579 match native_type {
580 NativeType::Null => vec![DataType::Null],
581 NativeType::Boolean => vec![DataType::Boolean],
582 NativeType::Int8 => vec![DataType::Int8],
583 NativeType::Int16 => vec![DataType::Int16],
584 NativeType::Int32 => vec![DataType::Int32],
585 NativeType::Int64 => vec![DataType::Int64],
586 NativeType::UInt8 => vec![DataType::UInt8],
587 NativeType::UInt16 => vec![DataType::UInt16],
588 NativeType::UInt32 => vec![DataType::UInt32],
589 NativeType::UInt64 => vec![DataType::UInt64],
590 NativeType::Float16 => vec![DataType::Float16],
591 NativeType::Float32 => vec![DataType::Float32],
592 NativeType::Float64 => vec![DataType::Float64],
593 NativeType::Date => vec![DataType::Date32, DataType::Date64],
594 NativeType::Binary => vec![
595 DataType::Binary,
596 DataType::LargeBinary,
597 DataType::BinaryView,
598 ],
599 NativeType::String => {
600 vec![DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View]
601 }
602 // TODO: support other native types
603 _ => vec![],
604 }
605}
606
607/// Represents type coercion rules for function arguments, specifying both the desired type
608/// and optional implicit coercion rules for source types.
609///
610/// # Examples
611///
612/// ```
613/// use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
614/// use datafusion_common::types::{NativeType, logical_binary, logical_string};
615///
616/// // Exact coercion that only accepts timestamp types
617/// let exact = Coercion::new_exact(TypeSignatureClass::Timestamp);
618///
619/// // Implicit coercion that accepts string types but can coerce from binary types
620/// let implicit = Coercion::new_implicit(
621/// TypeSignatureClass::Native(logical_string()),
622/// vec![TypeSignatureClass::Native(logical_binary())],
623/// NativeType::String
624/// );
625/// ```
626///
627/// There are two variants:
628///
629/// * `Exact` - Only accepts arguments that exactly match the desired type
630/// * `Implicit` - Accepts the desired type and can coerce from specified source types
631#[derive(Debug, Clone, Eq, PartialOrd)]
632pub enum Coercion {
633 /// Coercion that only accepts arguments exactly matching the desired type.
634 Exact {
635 /// The required type for the argument
636 desired_type: TypeSignatureClass,
637 },
638
639 /// Coercion that accepts the desired type and can implicitly coerce from other types.
640 Implicit {
641 /// The primary desired type for the argument
642 desired_type: TypeSignatureClass,
643 /// Rules for implicit coercion from other types
644 implicit_coercion: ImplicitCoercion,
645 },
646}
647
648impl Coercion {
649 pub fn new_exact(desired_type: TypeSignatureClass) -> Self {
650 Self::Exact { desired_type }
651 }
652
653 /// Create a new coercion with implicit coercion rules.
654 ///
655 /// `allowed_source_types` defines the possible types that can be coerced to `desired_type`.
656 /// `default_casted_type` is the default type to be used for coercion if we cast from other types via `allowed_source_types`.
657 pub fn new_implicit(
658 desired_type: TypeSignatureClass,
659 allowed_source_types: Vec<TypeSignatureClass>,
660 default_casted_type: NativeType,
661 ) -> Self {
662 Self::Implicit {
663 desired_type,
664 implicit_coercion: ImplicitCoercion {
665 allowed_source_types,
666 default_casted_type,
667 },
668 }
669 }
670
671 pub fn allowed_source_types(&self) -> &[TypeSignatureClass] {
672 match self {
673 Coercion::Exact { .. } => &[],
674 Coercion::Implicit {
675 implicit_coercion, ..
676 } => implicit_coercion.allowed_source_types.as_slice(),
677 }
678 }
679
680 pub fn default_casted_type(&self) -> Option<&NativeType> {
681 match self {
682 Coercion::Exact { .. } => None,
683 Coercion::Implicit {
684 implicit_coercion, ..
685 } => Some(&implicit_coercion.default_casted_type),
686 }
687 }
688
689 pub fn desired_type(&self) -> &TypeSignatureClass {
690 match self {
691 Coercion::Exact { desired_type } => desired_type,
692 Coercion::Implicit { desired_type, .. } => desired_type,
693 }
694 }
695
696 pub fn implicit_coercion(&self) -> Option<&ImplicitCoercion> {
697 match self {
698 Coercion::Exact { .. } => None,
699 Coercion::Implicit {
700 implicit_coercion, ..
701 } => Some(implicit_coercion),
702 }
703 }
704}
705
706impl Display for Coercion {
707 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
708 write!(f, "Coercion({}", self.desired_type())?;
709 if let Some(implicit_coercion) = self.implicit_coercion() {
710 write!(f, ", implicit_coercion={implicit_coercion}",)
711 } else {
712 write!(f, ")")
713 }
714 }
715}
716
717impl PartialEq for Coercion {
718 fn eq(&self, other: &Self) -> bool {
719 self.desired_type() == other.desired_type()
720 && self.implicit_coercion() == other.implicit_coercion()
721 }
722}
723
724impl Hash for Coercion {
725 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
726 self.desired_type().hash(state);
727 self.implicit_coercion().hash(state);
728 }
729}
730
731/// Defines rules for implicit type coercion, specifying which source types can be
732/// coerced and the default type to use when coercing.
733///
734/// This is used by functions to specify which types they can accept via implicit
735/// coercion in addition to their primary desired type.
736///
737/// # Examples
738///
739/// ```
740/// use arrow::datatypes::TimeUnit;
741///
742/// use datafusion_expr_common::signature::{Coercion, ImplicitCoercion, TypeSignatureClass};
743/// use datafusion_common::types::{NativeType, logical_binary};
744///
745/// // Allow coercing from binary types to timestamp, coerce to specific timestamp unit and timezone
746/// let implicit = Coercion::new_implicit(
747/// TypeSignatureClass::Timestamp,
748/// vec![TypeSignatureClass::Native(logical_binary())],
749/// NativeType::Timestamp(TimeUnit::Second, None),
750/// );
751/// ```
752#[derive(Debug, Clone, Eq, PartialOrd)]
753pub struct ImplicitCoercion {
754 /// The types that can be coerced from via implicit casting
755 allowed_source_types: Vec<TypeSignatureClass>,
756
757 /// The default type to use when coercing from allowed source types.
758 /// This is particularly important for types like Timestamp that have multiple
759 /// possible configurations (different time units and timezones).
760 default_casted_type: NativeType,
761}
762
763impl Display for ImplicitCoercion {
764 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
765 write!(
766 f,
767 "ImplicitCoercion({:?}, default_type={:?})",
768 self.allowed_source_types, self.default_casted_type
769 )
770 }
771}
772
773impl PartialEq for ImplicitCoercion {
774 fn eq(&self, other: &Self) -> bool {
775 self.allowed_source_types == other.allowed_source_types
776 && self.default_casted_type == other.default_casted_type
777 }
778}
779
780impl Hash for ImplicitCoercion {
781 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
782 self.allowed_source_types.hash(state);
783 self.default_casted_type.hash(state);
784 }
785}
786
787/// Provides information necessary for calling a function.
788///
789/// - [`TypeSignature`] defines the argument types that a function has implementations
790/// for.
791///
792/// - [`Volatility`] defines how the output of the function changes with the input.
793#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
794pub struct Signature {
795 /// The data types that the function accepts. See [TypeSignature] for more information.
796 pub type_signature: TypeSignature,
797 /// The volatility of the function. See [Volatility] for more information.
798 pub volatility: Volatility,
799}
800
801impl Signature {
802 /// Creates a new Signature from a given type signature and volatility.
803 pub fn new(type_signature: TypeSignature, volatility: Volatility) -> Self {
804 Signature {
805 type_signature,
806 volatility,
807 }
808 }
809 /// An arbitrary number of arguments with the same type, from those listed in `common_types`.
810 pub fn variadic(common_types: Vec<DataType>, volatility: Volatility) -> Self {
811 Self {
812 type_signature: TypeSignature::Variadic(common_types),
813 volatility,
814 }
815 }
816 /// User-defined coercion rules for the function.
817 pub fn user_defined(volatility: Volatility) -> Self {
818 Self {
819 type_signature: TypeSignature::UserDefined,
820 volatility,
821 }
822 }
823
824 /// A specified number of numeric arguments
825 pub fn numeric(arg_count: usize, volatility: Volatility) -> Self {
826 Self {
827 type_signature: TypeSignature::Numeric(arg_count),
828 volatility,
829 }
830 }
831
832 /// A specified number of numeric arguments
833 pub fn string(arg_count: usize, volatility: Volatility) -> Self {
834 Self {
835 type_signature: TypeSignature::String(arg_count),
836 volatility,
837 }
838 }
839
840 /// An arbitrary number of arguments of any type.
841 pub fn variadic_any(volatility: Volatility) -> Self {
842 Self {
843 type_signature: TypeSignature::VariadicAny,
844 volatility,
845 }
846 }
847 /// A fixed number of arguments of the same type, from those listed in `valid_types`.
848 pub fn uniform(
849 arg_count: usize,
850 valid_types: Vec<DataType>,
851 volatility: Volatility,
852 ) -> Self {
853 Self {
854 type_signature: TypeSignature::Uniform(arg_count, valid_types),
855 volatility,
856 }
857 }
858 /// Exactly matches the types in `exact_types`, in order.
859 pub fn exact(exact_types: Vec<DataType>, volatility: Volatility) -> Self {
860 Signature {
861 type_signature: TypeSignature::Exact(exact_types),
862 volatility,
863 }
864 }
865
866 /// Target coerce types in order
867 pub fn coercible(target_types: Vec<Coercion>, volatility: Volatility) -> Self {
868 Self {
869 type_signature: TypeSignature::Coercible(target_types),
870 volatility,
871 }
872 }
873
874 /// Used for function that expects comparable data types, it will try to coerced all the types into single final one.
875 pub fn comparable(arg_count: usize, volatility: Volatility) -> Self {
876 Self {
877 type_signature: TypeSignature::Comparable(arg_count),
878 volatility,
879 }
880 }
881
882 pub fn nullary(volatility: Volatility) -> Self {
883 Signature {
884 type_signature: TypeSignature::Nullary,
885 volatility,
886 }
887 }
888
889 /// A specified number of arguments of any type
890 pub fn any(arg_count: usize, volatility: Volatility) -> Self {
891 Signature {
892 type_signature: TypeSignature::Any(arg_count),
893 volatility,
894 }
895 }
896
897 /// Any one of a list of [TypeSignature]s.
898 pub fn one_of(type_signatures: Vec<TypeSignature>, volatility: Volatility) -> Self {
899 Signature {
900 type_signature: TypeSignature::OneOf(type_signatures),
901 volatility,
902 }
903 }
904
905 /// Specialized [Signature] for ArrayAppend and similar functions.
906 pub fn array_and_element(volatility: Volatility) -> Self {
907 Signature {
908 type_signature: TypeSignature::ArraySignature(
909 ArrayFunctionSignature::Array {
910 arguments: vec![
911 ArrayFunctionArgument::Array,
912 ArrayFunctionArgument::Element,
913 ],
914 array_coercion: Some(ListCoercion::FixedSizedListToList),
915 },
916 ),
917 volatility,
918 }
919 }
920
921 /// Specialized [Signature] for ArrayPrepend and similar functions.
922 pub fn element_and_array(volatility: Volatility) -> Self {
923 Signature {
924 type_signature: TypeSignature::ArraySignature(
925 ArrayFunctionSignature::Array {
926 arguments: vec![
927 ArrayFunctionArgument::Element,
928 ArrayFunctionArgument::Array,
929 ],
930 array_coercion: Some(ListCoercion::FixedSizedListToList),
931 },
932 ),
933 volatility,
934 }
935 }
936
937 /// Specialized [Signature] for functions that take a fixed number of arrays.
938 pub fn arrays(
939 n: usize,
940 coercion: Option<ListCoercion>,
941 volatility: Volatility,
942 ) -> Self {
943 Signature {
944 type_signature: TypeSignature::ArraySignature(
945 ArrayFunctionSignature::Array {
946 arguments: vec![ArrayFunctionArgument::Array; n],
947 array_coercion: coercion,
948 },
949 ),
950 volatility,
951 }
952 }
953
954 /// Specialized [Signature] for Array functions with an optional index.
955 pub fn array_and_element_and_optional_index(volatility: Volatility) -> Self {
956 Signature {
957 type_signature: TypeSignature::OneOf(vec![
958 TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
959 arguments: vec![
960 ArrayFunctionArgument::Array,
961 ArrayFunctionArgument::Element,
962 ],
963 array_coercion: None,
964 }),
965 TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
966 arguments: vec![
967 ArrayFunctionArgument::Array,
968 ArrayFunctionArgument::Element,
969 ArrayFunctionArgument::Index,
970 ],
971 array_coercion: None,
972 }),
973 ]),
974 volatility,
975 }
976 }
977
978 /// Specialized [Signature] for ArrayElement and similar functions.
979 pub fn array_and_index(volatility: Volatility) -> Self {
980 Signature {
981 type_signature: TypeSignature::ArraySignature(
982 ArrayFunctionSignature::Array {
983 arguments: vec![
984 ArrayFunctionArgument::Array,
985 ArrayFunctionArgument::Index,
986 ],
987 array_coercion: Some(ListCoercion::FixedSizedListToList),
988 },
989 ),
990 volatility,
991 }
992 }
993
994 /// Specialized [Signature] for ArrayEmpty and similar functions.
995 pub fn array(volatility: Volatility) -> Self {
996 Signature::arrays(1, Some(ListCoercion::FixedSizedListToList), volatility)
997 }
998}
999
1000#[cfg(test)]
1001mod tests {
1002 use datafusion_common::types::{logical_int64, logical_string};
1003
1004 use super::*;
1005
1006 #[test]
1007 fn supports_zero_argument_tests() {
1008 // Testing `TypeSignature`s which supports 0 arg
1009 let positive_cases = vec![
1010 TypeSignature::Exact(vec![]),
1011 TypeSignature::OneOf(vec![
1012 TypeSignature::Exact(vec![DataType::Int8]),
1013 TypeSignature::Nullary,
1014 TypeSignature::Uniform(1, vec![DataType::Int8]),
1015 ]),
1016 TypeSignature::Nullary,
1017 ];
1018
1019 for case in positive_cases {
1020 assert!(
1021 case.supports_zero_argument(),
1022 "Expected {case:?} to support zero arguments"
1023 );
1024 }
1025
1026 // Testing `TypeSignature`s which doesn't support 0 arg
1027 let negative_cases = vec![
1028 TypeSignature::Exact(vec![DataType::Utf8]),
1029 TypeSignature::Uniform(1, vec![DataType::Float64]),
1030 TypeSignature::Any(1),
1031 TypeSignature::VariadicAny,
1032 TypeSignature::OneOf(vec![
1033 TypeSignature::Exact(vec![DataType::Int8]),
1034 TypeSignature::Uniform(1, vec![DataType::Int8]),
1035 ]),
1036 ];
1037
1038 for case in negative_cases {
1039 assert!(
1040 !case.supports_zero_argument(),
1041 "Expected {case:?} not to support zero arguments"
1042 );
1043 }
1044 }
1045
1046 #[test]
1047 fn type_signature_partial_ord() {
1048 // Test validates that partial ord is defined for TypeSignature and Signature.
1049 assert!(TypeSignature::UserDefined < TypeSignature::VariadicAny);
1050 assert!(TypeSignature::UserDefined < TypeSignature::Any(1));
1051
1052 assert!(
1053 TypeSignature::Uniform(1, vec![DataType::Null])
1054 < TypeSignature::Uniform(1, vec![DataType::Boolean])
1055 );
1056 assert!(
1057 TypeSignature::Uniform(1, vec![DataType::Null])
1058 < TypeSignature::Uniform(2, vec![DataType::Null])
1059 );
1060 assert!(
1061 TypeSignature::Uniform(usize::MAX, vec![DataType::Null])
1062 < TypeSignature::Exact(vec![DataType::Null])
1063 );
1064 }
1065
1066 #[test]
1067 fn test_get_possible_types() {
1068 let type_signature = TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]);
1069 let possible_types = type_signature.get_example_types();
1070 assert_eq!(possible_types, vec![vec![DataType::Int32, DataType::Int64]]);
1071
1072 let type_signature = TypeSignature::OneOf(vec![
1073 TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]),
1074 TypeSignature::Exact(vec![DataType::Float32, DataType::Float64]),
1075 ]);
1076 let possible_types = type_signature.get_example_types();
1077 assert_eq!(
1078 possible_types,
1079 vec![
1080 vec![DataType::Int32, DataType::Int64],
1081 vec![DataType::Float32, DataType::Float64]
1082 ]
1083 );
1084
1085 let type_signature = TypeSignature::OneOf(vec![
1086 TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]),
1087 TypeSignature::Exact(vec![DataType::Float32, DataType::Float64]),
1088 TypeSignature::Exact(vec![DataType::Utf8]),
1089 ]);
1090 let possible_types = type_signature.get_example_types();
1091 assert_eq!(
1092 possible_types,
1093 vec![
1094 vec![DataType::Int32, DataType::Int64],
1095 vec![DataType::Float32, DataType::Float64],
1096 vec![DataType::Utf8]
1097 ]
1098 );
1099
1100 let type_signature =
1101 TypeSignature::Uniform(2, vec![DataType::Float32, DataType::Int64]);
1102 let possible_types = type_signature.get_example_types();
1103 assert_eq!(
1104 possible_types,
1105 vec![
1106 vec![DataType::Float32, DataType::Float32],
1107 vec![DataType::Int64, DataType::Int64]
1108 ]
1109 );
1110
1111 let type_signature = TypeSignature::Coercible(vec![
1112 Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
1113 Coercion::new_exact(TypeSignatureClass::Native(logical_int64())),
1114 ]);
1115 let possible_types = type_signature.get_example_types();
1116 assert_eq!(
1117 possible_types,
1118 vec![
1119 vec![DataType::Utf8, DataType::Int64],
1120 vec![DataType::LargeUtf8, DataType::Int64],
1121 vec![DataType::Utf8View, DataType::Int64]
1122 ]
1123 );
1124
1125 let type_signature =
1126 TypeSignature::Variadic(vec![DataType::Int32, DataType::Int64]);
1127 let possible_types = type_signature.get_example_types();
1128 assert_eq!(
1129 possible_types,
1130 vec![vec![DataType::Int32], vec![DataType::Int64]]
1131 );
1132
1133 let type_signature = TypeSignature::Numeric(2);
1134 let possible_types = type_signature.get_example_types();
1135 assert_eq!(
1136 possible_types,
1137 vec![
1138 vec![DataType::Int8, DataType::Int8],
1139 vec![DataType::Int16, DataType::Int16],
1140 vec![DataType::Int32, DataType::Int32],
1141 vec![DataType::Int64, DataType::Int64],
1142 vec![DataType::UInt8, DataType::UInt8],
1143 vec![DataType::UInt16, DataType::UInt16],
1144 vec![DataType::UInt32, DataType::UInt32],
1145 vec![DataType::UInt64, DataType::UInt64],
1146 vec![DataType::Float32, DataType::Float32],
1147 vec![DataType::Float64, DataType::Float64]
1148 ]
1149 );
1150
1151 let type_signature = TypeSignature::String(2);
1152 let possible_types = type_signature.get_example_types();
1153 assert_eq!(
1154 possible_types,
1155 vec![
1156 vec![DataType::Utf8, DataType::Utf8],
1157 vec![DataType::LargeUtf8, DataType::LargeUtf8],
1158 vec![DataType::Utf8View, DataType::Utf8View]
1159 ]
1160 );
1161 }
1162}