use core::fmt;
use std::fmt::Display;
use std::fmt::Formatter;
use std::sync::Arc;
use itertools::Itertools;
use vortex_utils::aliases::hash_set::HashSet;
use crate::dtype::DType;
use crate::dtype::FieldName;
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum Field {
Name(FieldName),
ElementType,
}
impl Field {
pub fn as_name(&self) -> Option<&str> {
match self {
Field::Name(name) => Some(name.as_ref()),
Field::ElementType => None,
}
}
pub fn is_named(&self) -> bool {
matches!(self, Field::Name(_))
}
}
impl From<&str> for Field {
fn from(value: &str) -> Self {
Field::Name(value.into())
}
}
impl From<Arc<str>> for Field {
fn from(value: Arc<str>) -> Self {
Self::Name(FieldName::from(value))
}
}
impl From<FieldName> for Field {
fn from(value: FieldName) -> Self {
Self::Name(value)
}
}
impl Display for Field {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
Field::Name(name) => write!(f, "${name}"),
Field::ElementType => write!(f, "[]"),
}
}
}
#[derive(Clone, Default, Debug, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct FieldPath(Vec<Field>);
#[macro_export]
macro_rules! field_path {
($front:ident) => {{
$crate::dtype::FieldPath::from_name(stringify!($front))
}};
($front:ident $(. $rest:ident)+) => {{
$crate::dtype::FieldPath::from_iter([
$crate::dtype::Field::from(stringify!($front)),
$($crate::dtype::Field::from(stringify!($rest))),+
])
}};
}
impl FieldPath {
pub fn root() -> Self {
Self::default()
}
pub fn from_name<F: Into<FieldName>>(name: F) -> Self {
Self(vec![Field::Name(name.into())])
}
pub fn parts(&self) -> &[Field] {
&self.0
}
pub fn is_root(&self) -> bool {
self.0.is_empty()
}
pub fn push<F: Into<Field>>(mut self, field: F) -> Self {
self.0.push(field.into());
self
}
pub fn starts_with_field(&self, field: &Field) -> bool {
assert!(matches!(field, Field::Name(_)));
let first = self.0.first();
assert!(matches!(first, Some(Field::Name(_))));
first.is_some_and(|f| f == field)
}
pub fn step_into(mut self) -> Option<Self> {
if self.0.is_empty() {
return None;
}
self.0 = self.0.into_iter().skip(1).collect();
Some(self)
}
pub fn resolve(&self, mut dtype: DType) -> Option<DType> {
for field in &self.0 {
dtype = match (dtype, field) {
(DType::Struct(fields, _), Field::Name(name)) => fields.field(name)?,
(DType::List(element_dtype, _), Field::ElementType) => {
element_dtype.as_ref().clone()
}
(..) => return None,
}
}
Some(dtype)
}
pub fn exists_in(&self, dtype: DType) -> bool {
self.resolve(dtype).is_some()
}
pub fn overlap(&self, other: &FieldPath) -> bool {
let min_len = self.0.len().min(other.0.len());
self.0.iter().take(min_len).eq(other.0.iter().take(min_len))
}
}
impl FromIterator<Field> for FieldPath {
fn from_iter<T: IntoIterator<Item = Field>>(iter: T) -> Self {
FieldPath(iter.into_iter().collect())
}
}
impl From<Field> for FieldPath {
fn from(value: Field) -> Self {
FieldPath(vec![value])
}
}
impl From<Vec<Field>> for FieldPath {
fn from(value: Vec<Field>) -> Self {
FieldPath(value)
}
}
impl Display for FieldPath {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
Display::fmt(&self.0.iter().format("."), f)
}
}
#[derive(Default, Clone, Debug)]
pub struct FieldPathSet {
set: HashSet<FieldPath>,
}
impl FieldPathSet {
pub fn contains(&self, path: &FieldPath) -> bool {
self.set.contains(path)
}
}
impl FromIterator<FieldPath> for FieldPathSet {
fn from_iter<T: IntoIterator<Item = FieldPath>>(iter: T) -> Self {
let set = HashSet::from_iter(iter);
Self { set }
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::dtype::DType;
use crate::dtype::Nullability::*;
use crate::dtype::PType;
use crate::dtype::StructFields;
#[test]
fn test_field_path() {
let path = FieldPath::from_name("A").push("B").push("C");
assert_eq!(path.to_string(), "$A.$B.$C");
let fields = vec!["A", "B", "C"]
.into_iter()
.map(Field::from)
.collect_vec();
assert_eq!(path.parts(), &fields);
let vec_path = FieldPath::from(fields);
assert_eq!(vec_path.to_string(), "$A.$B.$C");
assert_eq!(path, vec_path);
}
#[test]
fn nested_field_single_level() {
let a_type = DType::Primitive(PType::I32, NonNullable);
let dtype = DType::struct_(
[("a", a_type.clone()), ("b", DType::Bool(Nullable))],
NonNullable,
);
let path = FieldPath::from_name("a");
assert_eq!(a_type, path.resolve(dtype.clone()).unwrap());
assert!(path.exists_in(dtype));
}
#[test]
fn nested_field_two_level() {
let inner = DType::struct_(
[
("inner_a", DType::Primitive(PType::U8, NonNullable)),
("inner_b", DType::Bool(Nullable)),
],
NonNullable,
);
let outer = DType::Struct(
StructFields::from_iter([("outer_a", DType::Bool(NonNullable)), ("outer_b", inner)]),
NonNullable,
);
let path = FieldPath::from_name("outer_b").push("inner_a");
let dtype = path.resolve(outer.clone()).unwrap();
assert_eq!(dtype, DType::Primitive(PType::U8, NonNullable));
assert!(path.exists_in(outer));
}
#[test]
fn nested_field_deep_nested() {
let level1 = DType::struct_(
[(
"a",
DType::struct_(
[(
"b",
DType::list(
DType::struct_(
[("c", DType::Primitive(PType::F64, Nullable))],
NonNullable,
),
Nullable,
),
)],
NonNullable,
),
)],
NonNullable,
);
let path = FieldPath::from_name("a")
.push("b")
.push(Field::ElementType)
.push("c");
let dtype = path.resolve(level1.clone()).unwrap();
assert_eq!(dtype, DType::Primitive(PType::F64, Nullable));
assert!(path.exists_in(level1.clone()));
let path = FieldPath::from_name("a")
.push("b")
.push("c")
.push(Field::ElementType);
assert!(path.resolve(level1.clone()).is_none());
assert!(!path.exists_in(level1.clone()));
let path = FieldPath::from_name("a")
.push(Field::ElementType)
.push("b")
.push("c");
assert!(path.resolve(level1.clone()).is_none());
assert!(!path.exists_in(level1.clone()));
let path = FieldPath::root().push("a").push("b").push("c");
assert!(path.resolve(level1.clone()).is_none());
assert!(!path.exists_in(level1));
}
#[test]
fn nested_field_not_found() {
let dtype = DType::struct_([("a", DType::Bool(NonNullable))], NonNullable);
let path = field_path!(b);
assert!(path.resolve(dtype.clone()).is_none());
assert!(!path.exists_in(dtype.clone()));
let path = FieldPath::from(Field::ElementType);
assert!(path.resolve(dtype.clone()).is_none());
assert!(!path.exists_in(dtype));
}
#[test]
fn nested_field_non_struct_intermediate() {
let dtype = DType::struct_(
[("a", DType::Primitive(PType::I32, NonNullable))],
NonNullable,
);
let path = field_path!(a.b);
assert!(path.resolve(dtype.clone()).is_none());
assert!(!path.exists_in(dtype.clone()));
let path = FieldPath::from_name("a").push(Field::ElementType);
assert!(path.resolve(dtype.clone()).is_none());
assert!(!path.exists_in(dtype));
}
#[test]
fn test_overlap_positive() {
let path1 = field_path!(a.b.c);
let path2 = field_path!(a.b);
assert!(path1.overlap(&path2));
assert!(path2.overlap(&path1));
let path3 = field_path!(a);
assert!(path1.overlap(&path3));
assert!(path3.overlap(&path1));
}
#[test]
fn test_overlap_negative() {
let path1 = field_path!(a.b.c);
let path2 = field_path!(a.x.y);
assert!(!path1.overlap(&path2));
assert!(!path2.overlap(&path1));
let path3 = field_path!(x);
assert!(!path1.overlap(&path3));
assert!(!path3.overlap(&path1));
}
}