use std::fmt::Display;
use std::hash::Hash;
use crate::type_coercion::aggregates::NUMERICS;
use arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
use datafusion_common::internal_err;
use datafusion_common::types::{LogicalType, LogicalTypeRef, NativeType};
use datafusion_common::utils::ListCoercion;
use indexmap::IndexSet;
use itertools::Itertools;
pub const TIMEZONE_WILDCARD: &str = "+TZ";
pub const FIXED_SIZE_LIST_WILDCARD: i32 = i32::MIN;
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
pub enum Volatility {
Immutable,
Stable,
Volatile,
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
pub enum TypeSignature {
Variadic(Vec<DataType>),
UserDefined,
VariadicAny,
Uniform(usize, Vec<DataType>),
Exact(Vec<DataType>),
Coercible(Vec<Coercion>),
Comparable(usize),
Any(usize),
OneOf(Vec<TypeSignature>),
ArraySignature(ArrayFunctionSignature),
Numeric(usize),
String(usize),
Nullary,
}
impl TypeSignature {
#[inline]
pub fn is_one_of(&self) -> bool {
matches!(self, TypeSignature::OneOf(_))
}
}
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash)]
pub enum TypeSignatureClass {
Timestamp,
Time,
Interval,
Duration,
Native(LogicalTypeRef),
Integer,
}
impl Display for TypeSignatureClass {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "TypeSignatureClass::{self:?}")
}
}
impl TypeSignatureClass {
fn get_example_types(&self) -> Vec<DataType> {
match self {
TypeSignatureClass::Native(l) => get_data_types(l.native()),
TypeSignatureClass::Timestamp => {
vec![
DataType::Timestamp(TimeUnit::Nanosecond, None),
DataType::Timestamp(
TimeUnit::Nanosecond,
Some(TIMEZONE_WILDCARD.into()),
),
]
}
TypeSignatureClass::Time => {
vec![DataType::Time64(TimeUnit::Nanosecond)]
}
TypeSignatureClass::Interval => {
vec![DataType::Interval(IntervalUnit::DayTime)]
}
TypeSignatureClass::Duration => {
vec![DataType::Duration(TimeUnit::Nanosecond)]
}
TypeSignatureClass::Integer => {
vec![DataType::Int64]
}
}
}
pub fn matches_native_type(
self: &TypeSignatureClass,
logical_type: &NativeType,
) -> bool {
if logical_type == &NativeType::Null {
return true;
}
match self {
TypeSignatureClass::Native(t) if t.native() == logical_type => true,
TypeSignatureClass::Timestamp if logical_type.is_timestamp() => true,
TypeSignatureClass::Time if logical_type.is_time() => true,
TypeSignatureClass::Interval if logical_type.is_interval() => true,
TypeSignatureClass::Duration if logical_type.is_duration() => true,
TypeSignatureClass::Integer if logical_type.is_integer() => true,
_ => false,
}
}
pub fn default_casted_type(
&self,
native_type: &NativeType,
origin_type: &DataType,
) -> datafusion_common::Result<DataType> {
match self {
TypeSignatureClass::Native(logical_type) => {
logical_type.native().default_cast_for(origin_type)
}
TypeSignatureClass::Timestamp if native_type.is_timestamp() => {
Ok(origin_type.to_owned())
}
TypeSignatureClass::Time if native_type.is_time() => {
Ok(origin_type.to_owned())
}
TypeSignatureClass::Interval if native_type.is_interval() => {
Ok(origin_type.to_owned())
}
TypeSignatureClass::Duration if native_type.is_duration() => {
Ok(origin_type.to_owned())
}
TypeSignatureClass::Integer if native_type.is_integer() => {
Ok(origin_type.to_owned())
}
_ => internal_err!("May miss the matching logic in `matches_native_type`"),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
pub enum ArrayFunctionSignature {
Array {
arguments: Vec<ArrayFunctionArgument>,
array_coercion: Option<ListCoercion>,
},
RecursiveArray,
MapArray,
}
impl Display for ArrayFunctionSignature {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ArrayFunctionSignature::Array { arguments, .. } => {
for (idx, argument) in arguments.iter().enumerate() {
write!(f, "{argument}")?;
if idx != arguments.len() - 1 {
write!(f, ", ")?;
}
}
Ok(())
}
ArrayFunctionSignature::RecursiveArray => {
write!(f, "recursive_array")
}
ArrayFunctionSignature::MapArray => {
write!(f, "map_array")
}
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
pub enum ArrayFunctionArgument {
Element,
Index,
Array,
String,
}
impl Display for ArrayFunctionArgument {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ArrayFunctionArgument::Element => {
write!(f, "element")
}
ArrayFunctionArgument::Index => {
write!(f, "index")
}
ArrayFunctionArgument::Array => {
write!(f, "array")
}
ArrayFunctionArgument::String => {
write!(f, "string")
}
}
}
}
impl TypeSignature {
pub fn to_string_repr(&self) -> Vec<String> {
match self {
TypeSignature::Nullary => {
vec!["NullAry()".to_string()]
}
TypeSignature::Variadic(types) => {
vec![format!("{}, ..", Self::join_types(types, "/"))]
}
TypeSignature::Uniform(arg_count, valid_types) => {
vec![
std::iter::repeat_n(Self::join_types(valid_types, "/"), *arg_count)
.collect::<Vec<String>>()
.join(", "),
]
}
TypeSignature::String(num) => {
vec![format!("String({num})")]
}
TypeSignature::Numeric(num) => {
vec![format!("Numeric({num})")]
}
TypeSignature::Comparable(num) => {
vec![format!("Comparable({num})")]
}
TypeSignature::Coercible(coercions) => {
vec![Self::join_types(coercions, ", ")]
}
TypeSignature::Exact(types) => {
vec![Self::join_types(types, ", ")]
}
TypeSignature::Any(arg_count) => {
vec![std::iter::repeat_n("Any", *arg_count)
.collect::<Vec<&str>>()
.join(", ")]
}
TypeSignature::UserDefined => {
vec!["UserDefined".to_string()]
}
TypeSignature::VariadicAny => vec!["Any, .., Any".to_string()],
TypeSignature::OneOf(sigs) => {
sigs.iter().flat_map(|s| s.to_string_repr()).collect()
}
TypeSignature::ArraySignature(array_signature) => {
vec![array_signature.to_string()]
}
}
}
pub fn join_types<T: Display>(types: &[T], delimiter: &str) -> String {
types
.iter()
.map(|t| t.to_string())
.collect::<Vec<String>>()
.join(delimiter)
}
pub fn supports_zero_argument(&self) -> bool {
match &self {
TypeSignature::Exact(vec) => vec.is_empty(),
TypeSignature::Nullary => true,
TypeSignature::OneOf(types) => types
.iter()
.any(|type_sig| type_sig.supports_zero_argument()),
_ => false,
}
}
pub fn used_to_support_zero_arguments(&self) -> bool {
match &self {
TypeSignature::Any(num) => *num == 0,
_ => self.supports_zero_argument(),
}
}
#[deprecated(since = "46.0.0", note = "See get_example_types instead")]
pub fn get_possible_types(&self) -> Vec<Vec<DataType>> {
self.get_example_types()
}
pub fn get_example_types(&self) -> Vec<Vec<DataType>> {
match self {
TypeSignature::Exact(types) => vec![types.clone()],
TypeSignature::OneOf(types) => types
.iter()
.flat_map(|type_sig| type_sig.get_example_types())
.collect(),
TypeSignature::Uniform(arg_count, types) => types
.iter()
.cloned()
.map(|data_type| vec![data_type; *arg_count])
.collect(),
TypeSignature::Coercible(coercions) => coercions
.iter()
.map(|c| {
let mut all_types: IndexSet<DataType> =
c.desired_type().get_example_types().into_iter().collect();
if let Some(implicit_coercion) = c.implicit_coercion() {
let allowed_casts: Vec<DataType> = implicit_coercion
.allowed_source_types
.iter()
.flat_map(|t| t.get_example_types())
.collect();
all_types.extend(allowed_casts);
}
all_types.into_iter().collect::<Vec<_>>()
})
.multi_cartesian_product()
.collect(),
TypeSignature::Variadic(types) => types
.iter()
.cloned()
.map(|data_type| vec![data_type])
.collect(),
TypeSignature::Numeric(arg_count) => NUMERICS
.iter()
.cloned()
.map(|numeric_type| vec![numeric_type; *arg_count])
.collect(),
TypeSignature::String(arg_count) => get_data_types(&NativeType::String)
.into_iter()
.map(|dt| vec![dt; *arg_count])
.collect::<Vec<_>>(),
TypeSignature::Any(_)
| TypeSignature::Comparable(_)
| TypeSignature::Nullary
| TypeSignature::VariadicAny
| TypeSignature::ArraySignature(_)
| TypeSignature::UserDefined => vec![],
}
}
}
fn get_data_types(native_type: &NativeType) -> Vec<DataType> {
match native_type {
NativeType::Null => vec![DataType::Null],
NativeType::Boolean => vec![DataType::Boolean],
NativeType::Int8 => vec![DataType::Int8],
NativeType::Int16 => vec![DataType::Int16],
NativeType::Int32 => vec![DataType::Int32],
NativeType::Int64 => vec![DataType::Int64],
NativeType::UInt8 => vec![DataType::UInt8],
NativeType::UInt16 => vec![DataType::UInt16],
NativeType::UInt32 => vec![DataType::UInt32],
NativeType::UInt64 => vec![DataType::UInt64],
NativeType::Float16 => vec![DataType::Float16],
NativeType::Float32 => vec![DataType::Float32],
NativeType::Float64 => vec![DataType::Float64],
NativeType::Date => vec![DataType::Date32, DataType::Date64],
NativeType::Binary => vec![
DataType::Binary,
DataType::LargeBinary,
DataType::BinaryView,
],
NativeType::String => {
vec![DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View]
}
_ => vec![],
}
}
#[derive(Debug, Clone, Eq, PartialOrd)]
pub enum Coercion {
Exact {
desired_type: TypeSignatureClass,
},
Implicit {
desired_type: TypeSignatureClass,
implicit_coercion: ImplicitCoercion,
},
}
impl Coercion {
pub fn new_exact(desired_type: TypeSignatureClass) -> Self {
Self::Exact { desired_type }
}
pub fn new_implicit(
desired_type: TypeSignatureClass,
allowed_source_types: Vec<TypeSignatureClass>,
default_casted_type: NativeType,
) -> Self {
Self::Implicit {
desired_type,
implicit_coercion: ImplicitCoercion {
allowed_source_types,
default_casted_type,
},
}
}
pub fn allowed_source_types(&self) -> &[TypeSignatureClass] {
match self {
Coercion::Exact { .. } => &[],
Coercion::Implicit {
implicit_coercion, ..
} => implicit_coercion.allowed_source_types.as_slice(),
}
}
pub fn default_casted_type(&self) -> Option<&NativeType> {
match self {
Coercion::Exact { .. } => None,
Coercion::Implicit {
implicit_coercion, ..
} => Some(&implicit_coercion.default_casted_type),
}
}
pub fn desired_type(&self) -> &TypeSignatureClass {
match self {
Coercion::Exact { desired_type } => desired_type,
Coercion::Implicit { desired_type, .. } => desired_type,
}
}
pub fn implicit_coercion(&self) -> Option<&ImplicitCoercion> {
match self {
Coercion::Exact { .. } => None,
Coercion::Implicit {
implicit_coercion, ..
} => Some(implicit_coercion),
}
}
}
impl Display for Coercion {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Coercion({}", self.desired_type())?;
if let Some(implicit_coercion) = self.implicit_coercion() {
write!(f, ", implicit_coercion={implicit_coercion}",)
} else {
write!(f, ")")
}
}
}
impl PartialEq for Coercion {
fn eq(&self, other: &Self) -> bool {
self.desired_type() == other.desired_type()
&& self.implicit_coercion() == other.implicit_coercion()
}
}
impl Hash for Coercion {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.desired_type().hash(state);
self.implicit_coercion().hash(state);
}
}
#[derive(Debug, Clone, Eq, PartialOrd)]
pub struct ImplicitCoercion {
allowed_source_types: Vec<TypeSignatureClass>,
default_casted_type: NativeType,
}
impl Display for ImplicitCoercion {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"ImplicitCoercion({:?}, default_type={:?})",
self.allowed_source_types, self.default_casted_type
)
}
}
impl PartialEq for ImplicitCoercion {
fn eq(&self, other: &Self) -> bool {
self.allowed_source_types == other.allowed_source_types
&& self.default_casted_type == other.default_casted_type
}
}
impl Hash for ImplicitCoercion {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.allowed_source_types.hash(state);
self.default_casted_type.hash(state);
}
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
pub struct Signature {
pub type_signature: TypeSignature,
pub volatility: Volatility,
}
impl Signature {
pub fn new(type_signature: TypeSignature, volatility: Volatility) -> Self {
Signature {
type_signature,
volatility,
}
}
pub fn variadic(common_types: Vec<DataType>, volatility: Volatility) -> Self {
Self {
type_signature: TypeSignature::Variadic(common_types),
volatility,
}
}
pub fn user_defined(volatility: Volatility) -> Self {
Self {
type_signature: TypeSignature::UserDefined,
volatility,
}
}
pub fn numeric(arg_count: usize, volatility: Volatility) -> Self {
Self {
type_signature: TypeSignature::Numeric(arg_count),
volatility,
}
}
pub fn string(arg_count: usize, volatility: Volatility) -> Self {
Self {
type_signature: TypeSignature::String(arg_count),
volatility,
}
}
pub fn variadic_any(volatility: Volatility) -> Self {
Self {
type_signature: TypeSignature::VariadicAny,
volatility,
}
}
pub fn uniform(
arg_count: usize,
valid_types: Vec<DataType>,
volatility: Volatility,
) -> Self {
Self {
type_signature: TypeSignature::Uniform(arg_count, valid_types),
volatility,
}
}
pub fn exact(exact_types: Vec<DataType>, volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::Exact(exact_types),
volatility,
}
}
pub fn coercible(target_types: Vec<Coercion>, volatility: Volatility) -> Self {
Self {
type_signature: TypeSignature::Coercible(target_types),
volatility,
}
}
pub fn comparable(arg_count: usize, volatility: Volatility) -> Self {
Self {
type_signature: TypeSignature::Comparable(arg_count),
volatility,
}
}
pub fn nullary(volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::Nullary,
volatility,
}
}
pub fn any(arg_count: usize, volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::Any(arg_count),
volatility,
}
}
pub fn one_of(type_signatures: Vec<TypeSignature>, volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::OneOf(type_signatures),
volatility,
}
}
pub fn array_and_element(volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::ArraySignature(
ArrayFunctionSignature::Array {
arguments: vec![
ArrayFunctionArgument::Array,
ArrayFunctionArgument::Element,
],
array_coercion: Some(ListCoercion::FixedSizedListToList),
},
),
volatility,
}
}
pub fn element_and_array(volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::ArraySignature(
ArrayFunctionSignature::Array {
arguments: vec![
ArrayFunctionArgument::Element,
ArrayFunctionArgument::Array,
],
array_coercion: Some(ListCoercion::FixedSizedListToList),
},
),
volatility,
}
}
pub fn arrays(
n: usize,
coercion: Option<ListCoercion>,
volatility: Volatility,
) -> Self {
Signature {
type_signature: TypeSignature::ArraySignature(
ArrayFunctionSignature::Array {
arguments: vec![ArrayFunctionArgument::Array; n],
array_coercion: coercion,
},
),
volatility,
}
}
pub fn array_and_element_and_optional_index(volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::OneOf(vec![
TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
arguments: vec![
ArrayFunctionArgument::Array,
ArrayFunctionArgument::Element,
],
array_coercion: None,
}),
TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
arguments: vec![
ArrayFunctionArgument::Array,
ArrayFunctionArgument::Element,
ArrayFunctionArgument::Index,
],
array_coercion: None,
}),
]),
volatility,
}
}
pub fn array_and_index(volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::ArraySignature(
ArrayFunctionSignature::Array {
arguments: vec![
ArrayFunctionArgument::Array,
ArrayFunctionArgument::Index,
],
array_coercion: Some(ListCoercion::FixedSizedListToList),
},
),
volatility,
}
}
pub fn array(volatility: Volatility) -> Self {
Signature::arrays(1, Some(ListCoercion::FixedSizedListToList), volatility)
}
}
#[cfg(test)]
mod tests {
use datafusion_common::types::{logical_int64, logical_string};
use super::*;
#[test]
fn supports_zero_argument_tests() {
let positive_cases = vec![
TypeSignature::Exact(vec![]),
TypeSignature::OneOf(vec![
TypeSignature::Exact(vec![DataType::Int8]),
TypeSignature::Nullary,
TypeSignature::Uniform(1, vec![DataType::Int8]),
]),
TypeSignature::Nullary,
];
for case in positive_cases {
assert!(
case.supports_zero_argument(),
"Expected {case:?} to support zero arguments"
);
}
let negative_cases = vec![
TypeSignature::Exact(vec![DataType::Utf8]),
TypeSignature::Uniform(1, vec![DataType::Float64]),
TypeSignature::Any(1),
TypeSignature::VariadicAny,
TypeSignature::OneOf(vec![
TypeSignature::Exact(vec![DataType::Int8]),
TypeSignature::Uniform(1, vec![DataType::Int8]),
]),
];
for case in negative_cases {
assert!(
!case.supports_zero_argument(),
"Expected {case:?} not to support zero arguments"
);
}
}
#[test]
fn type_signature_partial_ord() {
assert!(TypeSignature::UserDefined < TypeSignature::VariadicAny);
assert!(TypeSignature::UserDefined < TypeSignature::Any(1));
assert!(
TypeSignature::Uniform(1, vec![DataType::Null])
< TypeSignature::Uniform(1, vec![DataType::Boolean])
);
assert!(
TypeSignature::Uniform(1, vec![DataType::Null])
< TypeSignature::Uniform(2, vec![DataType::Null])
);
assert!(
TypeSignature::Uniform(usize::MAX, vec![DataType::Null])
< TypeSignature::Exact(vec![DataType::Null])
);
}
#[test]
fn test_get_possible_types() {
let type_signature = TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]);
let possible_types = type_signature.get_example_types();
assert_eq!(possible_types, vec![vec![DataType::Int32, DataType::Int64]]);
let type_signature = TypeSignature::OneOf(vec![
TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]),
TypeSignature::Exact(vec![DataType::Float32, DataType::Float64]),
]);
let possible_types = type_signature.get_example_types();
assert_eq!(
possible_types,
vec![
vec![DataType::Int32, DataType::Int64],
vec![DataType::Float32, DataType::Float64]
]
);
let type_signature = TypeSignature::OneOf(vec![
TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]),
TypeSignature::Exact(vec![DataType::Float32, DataType::Float64]),
TypeSignature::Exact(vec![DataType::Utf8]),
]);
let possible_types = type_signature.get_example_types();
assert_eq!(
possible_types,
vec![
vec![DataType::Int32, DataType::Int64],
vec![DataType::Float32, DataType::Float64],
vec![DataType::Utf8]
]
);
let type_signature =
TypeSignature::Uniform(2, vec![DataType::Float32, DataType::Int64]);
let possible_types = type_signature.get_example_types();
assert_eq!(
possible_types,
vec![
vec![DataType::Float32, DataType::Float32],
vec![DataType::Int64, DataType::Int64]
]
);
let type_signature = TypeSignature::Coercible(vec![
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
Coercion::new_exact(TypeSignatureClass::Native(logical_int64())),
]);
let possible_types = type_signature.get_example_types();
assert_eq!(
possible_types,
vec![
vec![DataType::Utf8, DataType::Int64],
vec![DataType::LargeUtf8, DataType::Int64],
vec![DataType::Utf8View, DataType::Int64]
]
);
let type_signature =
TypeSignature::Variadic(vec![DataType::Int32, DataType::Int64]);
let possible_types = type_signature.get_example_types();
assert_eq!(
possible_types,
vec![vec![DataType::Int32], vec![DataType::Int64]]
);
let type_signature = TypeSignature::Numeric(2);
let possible_types = type_signature.get_example_types();
assert_eq!(
possible_types,
vec![
vec![DataType::Int8, DataType::Int8],
vec![DataType::Int16, DataType::Int16],
vec![DataType::Int32, DataType::Int32],
vec![DataType::Int64, DataType::Int64],
vec![DataType::UInt8, DataType::UInt8],
vec![DataType::UInt16, DataType::UInt16],
vec![DataType::UInt32, DataType::UInt32],
vec![DataType::UInt64, DataType::UInt64],
vec![DataType::Float32, DataType::Float32],
vec![DataType::Float64, DataType::Float64]
]
);
let type_signature = TypeSignature::String(2);
let possible_types = type_signature.get_example_types();
assert_eq!(
possible_types,
vec![
vec![DataType::Utf8, DataType::Utf8],
vec![DataType::LargeUtf8, DataType::LargeUtf8],
vec![DataType::Utf8View, DataType::Utf8View]
]
);
}
}