use crate::{SyntaxShape, ast::PathMember};
use serde::{Deserialize, Serialize};
use std::{borrow::Cow, fmt::Display};
#[cfg(test)]
use strum_macros::EnumIter;
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize, Hash, Ord, PartialOrd)]
#[cfg_attr(test, derive(EnumIter))]
pub enum Type {
Any,
Binary,
Block,
Bool,
CellPath,
Closure,
Custom(Box<str>),
Date,
Duration,
Error,
Filesize,
Float,
Int,
List(Box<Type>),
#[default]
Nothing,
Number,
OneOf(Box<[Type]>),
Range,
Record(Box<[(String, Type)]>),
String,
Glob,
Table(Box<[(String, Type)]>),
}
fn follow_cell_path_recursive<'a>(
current: Cow<'a, Type>,
path_members: &mut dyn Iterator<Item = &'a PathMember>,
) -> Option<Cow<'a, Type>> {
let Some(first) = path_members.next() else {
return Some(current);
};
match (current.as_ref(), first) {
(Type::Record(fields), PathMember::String { val, .. }) => {
let idx = fields.iter().position(|(name, _)| name == val)?;
let next = match current {
Cow::Borrowed(Type::Record(f)) => Cow::Borrowed(&f[idx].1),
Cow::Owned(Type::Record(f)) => Cow::Owned(f[idx].1.to_owned()),
_ => unreachable!(),
};
follow_cell_path_recursive(next, path_members)
}
(Type::Table(f), PathMember::Int { .. }) => {
follow_cell_path_recursive(Cow::Owned(Type::Record(f.clone())), path_members)
}
(Type::Table(fields), PathMember::String { val, .. }) => {
let (_, sub_type) = fields.iter().find(|(name, _)| name == val)?;
let list_type = Type::List(Box::new(sub_type.clone()));
follow_cell_path_recursive(Cow::Owned(list_type), path_members)
}
(Type::List(_), PathMember::Int { .. }) => {
let next = match current {
Cow::Borrowed(Type::List(i)) => Cow::Borrowed(i.as_ref()),
Cow::Owned(Type::List(i)) => Cow::Owned(*i),
_ => unreachable!(),
};
follow_cell_path_recursive(next, path_members)
}
(Type::List(_), PathMember::String { .. }) => {
let next = match current {
Cow::Borrowed(Type::List(i)) => Cow::Borrowed(i.as_ref()),
Cow::Owned(Type::List(i)) => Cow::Owned(*i),
_ => unreachable!(),
};
let mut found_int_member = false;
let mut new_iter = std::iter::once(first).chain(path_members).filter(|pm| {
let first_int = !found_int_member && matches!(pm, PathMember::Int { .. });
if first_int {
found_int_member = true;
}
!first_int
});
let inner_ty = follow_cell_path_recursive(next, &mut new_iter);
if found_int_member {
inner_ty
} else {
inner_ty.map(|inner_ty| Cow::Owned(Type::List(Box::new(inner_ty.into_owned()))))
}
}
_ => None,
}
}
impl Type {
pub fn list(inner: Type) -> Self {
Self::List(Box::new(inner))
}
pub fn one_of(types: impl IntoIterator<Item = Type>) -> Self {
let mut flattened = Vec::new();
for t in types {
Self::oneof_add(&mut flattened, t);
}
Self::OneOf(flattened.into())
}
pub fn record() -> Self {
Self::Record([].into())
}
pub fn table() -> Self {
Self::Table([].into())
}
pub fn custom(name: impl Into<Box<str>>) -> Self {
Self::Custom(name.into())
}
pub fn is_subtype_of(&self, other: &Type) -> bool {
let is_subtype_collection = |this: &[(String, Type)], that: &[(String, Type)]| {
if this.is_empty() || that.is_empty() {
true
} else if this.len() < that.len() {
false
} else {
that.iter().all(|(col_y, ty_y)| {
if let Some((_, ty_x)) = this.iter().find(|(col_x, _)| col_x == col_y) {
ty_x.is_subtype_of(ty_y)
} else {
false
}
})
}
};
match (self, other) {
(t, u) if t == u => true,
(_, Type::Any) => true,
(Type::String | Type::Int, Type::CellPath) => true,
(Type::OneOf(oneof), Type::CellPath) => {
oneof.iter().all(|t| t.is_subtype_of(&Type::CellPath))
}
(Type::Float | Type::Int, Type::Number) => true,
(Type::Glob, Type::String) | (Type::String, Type::Glob) => true,
(Type::List(t), Type::List(u)) if t.is_subtype_of(u) => true, (Type::Record(this), Type::Record(that)) | (Type::Table(this), Type::Table(that)) => {
is_subtype_collection(this, that)
}
(Type::Table(_), Type::List(that)) if matches!(**that, Type::Any) => true,
(Type::Table(this), Type::List(that)) => {
matches!(that.as_ref(), Type::Record(that) if is_subtype_collection(this, that))
}
(Type::List(this), Type::Table(that)) => {
matches!(this.as_ref(), Type::Record(this) if is_subtype_collection(this, that))
}
(Type::OneOf(this), that @ Type::OneOf(_)) => {
this.iter().all(|t| t.is_subtype_of(that))
}
(this, Type::OneOf(that)) => that.iter().any(|t| this.is_subtype_of(t)),
_ => false,
}
}
fn flat_widen(lhs: Type, rhs: Type) -> Result<Type, (Type, Type)> {
if lhs == rhs {
return Ok(lhs);
}
if matches!(lhs, Type::Any) || matches!(rhs, Type::Any) {
return Ok(Type::Any);
}
if matches!(lhs, Type::Int | Type::Float | Type::Number)
&& matches!(rhs, Type::Int | Type::Float | Type::Number)
{
return Ok(Type::Number);
}
if (matches!(lhs, Type::Glob) && matches!(rhs, Type::String))
|| (matches!(lhs, Type::String) && matches!(rhs, Type::Glob))
{
return Err((lhs, rhs));
}
match (&lhs, &rhs) {
(Type::Record(this), Type::Record(that)) => {
let widened = Self::widen_collection(this.clone(), that.clone());
return Ok(Type::Record(widened));
}
(Type::Table(this), Type::Table(that)) => {
let widened = Self::widen_collection(this.clone(), that.clone());
return Ok(Type::Table(widened));
}
(Type::List(_list_item), Type::Table(_table))
| (Type::Table(_table), Type::List(_list_item)) => {
let item = match (lhs, rhs) {
(Type::List(list_item), Type::Table(table)) => match *list_item {
Type::Record(record) => Type::Record(Self::widen_collection(record, table)),
list_item => Type::one_of([list_item, Type::Record(table)]),
},
(Type::Table(table), Type::List(list_item)) => match *list_item {
Type::Record(record) => Type::Record(Self::widen_collection(record, table)),
list_item => Type::one_of([list_item, Type::Record(table)]),
},
_ => unreachable!(),
};
return Ok(Type::List(Box::new(item)));
}
(Type::List(lhs), Type::List(rhs)) => {
let lhs_inner = lhs.clone();
let rhs_inner = rhs.clone();
return Ok(Type::list(lhs_inner.widen(*rhs_inner)));
}
_ => {}
}
if lhs.is_subtype_of(&rhs) {
return Ok(rhs);
}
if rhs.is_subtype_of(&lhs) {
return Ok(lhs);
}
Err((lhs, rhs))
}
fn widen_collection(
lhs: Box<[(String, Type)]>,
rhs: Box<[(String, Type)]>,
) -> Box<[(String, Type)]> {
if lhs.is_empty() || rhs.is_empty() {
return [].into();
}
let (small, big) = if lhs.len() <= rhs.len() {
(lhs, rhs)
} else {
(rhs, lhs)
};
const MAP_THRESH: usize = 16;
if big.len() > MAP_THRESH {
use std::collections::HashMap;
let mut big_map: HashMap<String, Type> = big.into_iter().collect();
small
.into_iter()
.filter_map(|(col, typ)| big_map.remove(&col).map(|b_typ| (col, typ.widen(b_typ))))
.collect()
} else {
small
.into_iter()
.filter_map(|(col, typ)| {
big.iter()
.find_map(|(b_col, b_typ)| (&col == b_col).then(|| b_typ.clone()))
.map(|b_typ| (col, typ.widen(b_typ)))
})
.collect()
}
}
pub fn widen(self, other: Type) -> Type {
fn shortcut_allowed(lhs: &Type, rhs: &Type) -> bool {
!matches!(
(lhs, rhs),
(Type::List(_), Type::Table(_)) | (Type::Table(_), Type::List(_))
)
}
if self.is_subtype_of(&other)
&& !other.is_subtype_of(&self)
&& shortcut_allowed(&self, &other)
{
return other;
}
let tu = match Self::flat_widen(self, other) {
Ok(t) => return t,
Err(tu) => tu,
};
match tu {
(Type::OneOf(ts), Type::OneOf(us)) => {
let (big, small) = match ts.len() >= us.len() {
true => (ts, us),
false => (us, ts),
};
let mut out = big.into_vec();
for t in small.into_iter() {
Self::oneof_add_widen(&mut out, t);
}
Type::one_of(out)
}
(Type::OneOf(oneof), t) | (t, Type::OneOf(oneof)) => {
let mut out = oneof.into_vec();
Self::oneof_add_widen(&mut out, t);
Type::one_of(out)
}
(this, other) => Type::one_of([this, other]),
}
}
fn oneof_add_widen(oneof: &mut Vec<Type>, mut t: Type) {
if let Type::OneOf(inner) = t {
for sub_t in inner.into_vec() {
Self::oneof_add_widen(oneof, sub_t);
}
return;
}
let mut i = 0;
while i < oneof.len() {
let one = std::mem::replace(&mut oneof[i], Type::Any);
match Self::flat_widen(one, t) {
Ok(one_t) => {
oneof[i] = one_t;
return;
}
Err((one_old, t_old)) => {
oneof[i] = one_old;
t = t_old; i += 1;
}
}
}
oneof.push(t);
}
fn oneof_add(oneof: &mut Vec<Type>, t: Type) {
match t {
Type::OneOf(inner) => {
for sub_t in inner.into_vec() {
Self::oneof_add(oneof, sub_t);
}
}
t => {
if !oneof.contains(&t) {
oneof.push(t);
}
}
}
}
pub fn supertype_of(it: impl IntoIterator<Item = Type>) -> Option<Self> {
let mut it = it.into_iter();
it.next().and_then(|head| {
it.try_fold(head, |acc, e| match acc.widen(e) {
Type::Any => None,
r => Some(r),
})
})
}
pub fn is_numeric(&self) -> bool {
matches!(self, Type::Int | Type::Float | Type::Number)
}
pub fn is_list(&self) -> bool {
matches!(self, Type::List(_))
}
pub fn accepts_cell_paths(&self) -> bool {
matches!(self, Type::List(_) | Type::Record(_) | Type::Table(_))
}
pub fn to_shape(&self) -> SyntaxShape {
let mk_shape = |tys: &[(String, Type)]| {
tys.iter()
.map(|(key, val)| (key.clone(), val.to_shape()))
.collect()
};
match self {
Type::Int => SyntaxShape::Int,
Type::Float => SyntaxShape::Float,
Type::Range => SyntaxShape::Range,
Type::Bool => SyntaxShape::Boolean,
Type::String => SyntaxShape::String,
Type::Block => SyntaxShape::Block, Type::Closure => SyntaxShape::Closure(None), Type::CellPath => SyntaxShape::CellPath,
Type::Duration => SyntaxShape::Duration,
Type::Date => SyntaxShape::DateTime,
Type::Filesize => SyntaxShape::Filesize,
Type::List(x) => SyntaxShape::List(Box::new(x.to_shape())),
Type::Number => SyntaxShape::Number,
Type::OneOf(types) => SyntaxShape::OneOf(types.iter().map(Type::to_shape).collect()),
Type::Nothing => SyntaxShape::Nothing,
Type::Record(entries) => SyntaxShape::Record(mk_shape(entries)),
Type::Table(columns) => SyntaxShape::Table(mk_shape(columns)),
Type::Any => SyntaxShape::Any,
Type::Error => SyntaxShape::Any,
Type::Binary => SyntaxShape::Binary,
Type::Custom(_) => SyntaxShape::Any,
Type::Glob => SyntaxShape::GlobPattern,
}
}
pub fn get_non_specified_string(&self) -> String {
match self {
Type::Closure => String::from("closure"),
Type::Bool => String::from("bool"),
Type::Block => String::from("block"),
Type::CellPath => String::from("cell-path"),
Type::Date => String::from("datetime"),
Type::Duration => String::from("duration"),
Type::Filesize => String::from("filesize"),
Type::Float => String::from("float"),
Type::Int => String::from("int"),
Type::Range => String::from("range"),
Type::Record(_) => String::from("record"),
Type::Table(_) => String::from("table"),
Type::List(_) => String::from("list"),
Type::Nothing => String::from("nothing"),
Type::Number => String::from("number"),
Type::OneOf(_) => String::from("oneof"),
Type::String => String::from("string"),
Type::Any => String::from("any"),
Type::Error => String::from("error"),
Type::Binary => String::from("binary"),
Type::Custom(_) => String::from("custom"),
Type::Glob => String::from("glob"),
}
}
pub fn follow_cell_path<'a>(&'a self, path_members: &'a [PathMember]) -> Option<Cow<'a, Self>> {
follow_cell_path_recursive(Cow::Borrowed(self), &mut path_members.iter())
}
}
impl Display for Type {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Type::Block => write!(f, "block"),
Type::Closure => write!(f, "closure"),
Type::Bool => write!(f, "bool"),
Type::CellPath => write!(f, "cell-path"),
Type::Date => write!(f, "datetime"),
Type::Duration => write!(f, "duration"),
Type::Filesize => write!(f, "filesize"),
Type::Float => write!(f, "float"),
Type::Int => write!(f, "int"),
Type::Range => write!(f, "range"),
Type::Record(fields) => {
if fields.is_empty() {
write!(f, "record")
} else {
write!(
f,
"record<{}>",
fields
.iter()
.map(|(x, y)| format!("{x}: {y}"))
.collect::<Vec<String>>()
.join(", "),
)
}
}
Type::Table(columns) => {
if columns.is_empty() {
write!(f, "table")
} else {
write!(
f,
"table<{}>",
columns
.iter()
.map(|(x, y)| format!("{x}: {y}"))
.collect::<Vec<String>>()
.join(", ")
)
}
}
Type::List(l) => write!(f, "list<{l}>"),
Type::Nothing => write!(f, "nothing"),
Type::Number => write!(f, "number"),
Type::OneOf(types) => {
write!(f, "oneof")?;
let [first, rest @ ..] = &**types else {
return Ok(());
};
write!(f, "<{first}")?;
for t in rest {
write!(f, ", {t}")?;
}
f.write_str(">")
}
Type::String => write!(f, "string"),
Type::Any => write!(f, "any"),
Type::Error => write!(f, "error"),
Type::Binary => write!(f, "binary"),
Type::Custom(custom) => write!(f, "{custom}"),
Type::Glob => write!(f, "glob"),
}
}
}
pub fn combined_type_string(types: &[Type], join_word: &str) -> Option<String> {
use std::fmt::Write as _;
let mut seen = Vec::new();
for t in types {
if !seen.contains(t) {
seen.push(t.clone());
}
}
match seen.as_slice() {
[] => None,
[one] => Some(one.to_string()),
[one, two] => Some(format!("{one} {join_word} {two}")),
[initial @ .., last] => {
let mut out = String::new();
for ele in initial {
let _ = write!(out, "{ele}, ");
}
let _ = write!(out, "{join_word} {last}");
Some(out)
}
}
}
#[cfg(test)]
mod tests {
use super::Type;
use strum::IntoEnumIterator;
mod subtype_relation {
use super::*;
#[test]
fn test_reflexivity() {
for ty in Type::iter() {
assert!(ty.is_subtype_of(&ty));
}
}
#[test]
fn test_any_is_top_type() {
for ty in Type::iter() {
assert!(ty.is_subtype_of(&Type::Any));
}
}
#[test]
fn test_number_supertype() {
assert!(Type::Int.is_subtype_of(&Type::Number));
assert!(Type::Float.is_subtype_of(&Type::Number));
}
#[test]
fn test_list_covariance() {
for ty1 in Type::iter() {
for ty2 in Type::iter() {
let list_ty1 = Type::List(Box::new(ty1.clone()));
let list_ty2 = Type::List(Box::new(ty2.clone()));
assert_eq!(list_ty1.is_subtype_of(&list_ty2), ty1.is_subtype_of(&ty2));
}
}
}
}
mod oneof_flattening {
use super::*;
#[test]
fn test_oneof_creation_flattens() {
let nested = Type::one_of([
Type::String,
Type::one_of([Type::Int, Type::Float]),
Type::Bool,
]);
if let Type::OneOf(types) = nested {
let types_vec = types.to_vec();
assert_eq!(types_vec.len(), 4);
assert!(types_vec.contains(&Type::String));
assert!(types_vec.contains(&Type::Int));
assert!(types_vec.contains(&Type::Float));
assert!(types_vec.contains(&Type::Bool));
} else {
panic!("Expected OneOf");
}
}
#[test]
fn test_widen_flattens_oneof() {
let a = Type::one_of([Type::String, Type::Int]);
let b = Type::one_of([Type::Float, Type::Bool]);
let widened = a.widen(b);
if let Type::OneOf(types) = widened {
let types_vec = types.to_vec();
assert_eq!(types_vec.len(), 3);
assert!(types_vec.contains(&Type::String));
assert!(types_vec.contains(&Type::Number)); assert!(types_vec.contains(&Type::Bool));
} else {
panic!("Expected OneOf");
}
}
#[test]
fn test_oneof_deduplicates() {
let record_type =
Type::Record(vec![("content".to_string(), Type::list(Type::String))].into());
let oneof = Type::one_of([Type::String, record_type.clone(), record_type.clone()]);
if let Type::OneOf(types) = oneof {
let types_vec = types.to_vec();
assert_eq!(types_vec.len(), 2);
assert!(types_vec.contains(&Type::String));
assert!(types_vec.contains(&record_type));
} else {
panic!("Expected OneOf");
}
}
}
mod widen_shortcuts {
use super::*;
#[test]
fn test_widen_subtype_shortcut() {
let union = Type::one_of([Type::String, Type::Number]);
let result = union.clone().widen(Type::Int);
assert_eq!(result, union);
let union2 = Type::one_of([Type::Int, Type::String]);
let result2 = Type::Int.widen(union2.clone());
assert_eq!(result2, union2);
}
#[test]
fn test_chain_shortcut() {
let mut t = Type::String;
for _ in 0..100 {
t = t.widen(Type::Int);
}
let expected = Type::one_of([Type::String, Type::Int]);
assert_eq!(t, expected);
}
#[test]
fn test_list_table_widen_preserves_list() {
let list_record = Type::List(Box::new(Type::Record(
vec![("a".to_string(), Type::Int)].into(),
)));
let table = Type::Table(vec![("a".to_string(), Type::Int)].into());
let widened = list_record.clone().widen(table.clone());
let expected = Type::List(Box::new(Type::Record(
vec![("a".to_string(), Type::Int)].into(),
)));
assert_eq!(widened, expected);
let widened2 = table.widen(list_record.clone());
assert_eq!(widened2, expected);
}
#[test]
fn test_glob_string_union() {
let g = Type::Glob;
let s = Type::String;
let w1 = g.clone().widen(s.clone());
let w2 = s.clone().widen(g.clone());
let expected1 = Type::one_of([Type::Glob, Type::String]);
let expected2 = Type::one_of([Type::String, Type::Glob]);
assert_eq!(w1, expected1);
assert_eq!(w2, expected2);
}
}
}