vortex_dtype/
field.rs

1//! Selectors for fields or elements in (possibly nested) `DType`s
2//!
3//! A `Field` indexes a single layer of `DType`, for example: a name in a struct or the element of a
4//! list. A `FieldPath` indexes zero or more layers, for example: the field "child" which is within
5//! the struct field "parent" which is within the struct field "grandparent".
6
7use core::fmt;
8use std::fmt::{Display, Formatter};
9use std::sync::Arc;
10
11use itertools::Itertools;
12use vortex_error::{VortexResult, vortex_bail};
13use vortex_utils::aliases::hash_set::HashSet;
14
15use crate::DType;
16
17/// Selects a nested type within either a struct or a list.
18#[derive(Clone, Debug, PartialEq, Eq, Hash)]
19#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
20pub enum Field {
21    /// Address a field of a [`crate::DType::Struct`].
22    Name(Arc<str>),
23    /// Address the element type of a [`crate::DType::List`].
24    ElementType,
25}
26
27impl From<&str> for Field {
28    fn from(value: &str) -> Self {
29        Field::Name(value.into())
30    }
31}
32
33impl From<Arc<str>> for Field {
34    fn from(value: Arc<str>) -> Self {
35        Self::Name(value)
36    }
37}
38
39impl From<&Arc<str>> for Field {
40    fn from(value: &Arc<str>) -> Self {
41        Self::Name(value.clone())
42    }
43}
44
45impl From<String> for Field {
46    fn from(value: String) -> Self {
47        Field::Name(value.into())
48    }
49}
50
51impl Display for Field {
52    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
53        match self {
54            Field::Name(name) => write!(f, "${name}"),
55            Field::ElementType => write!(f, "[]"),
56        }
57    }
58}
59
60impl Field {
61    /// Returns true if the field is defined by Name
62    pub fn is_named(&self) -> bool {
63        matches!(self, Field::Name(_))
64    }
65}
66
67/// A sequence of field selectors representing a path through zero or more layers of `DType`.
68///
69/// # Examples
70///
71/// The empty path references the root:
72///
73/// ```
74/// use vortex_dtype::*;
75///
76/// let dtype_i32 = DType::Primitive(PType::I32, Nullability::NonNullable);
77/// assert_eq!(dtype_i32, FieldPath::root().resolve(dtype_i32.clone()).unwrap());
78/// ```
79///
80// TODO(ngates): we should probably reverse the path. Or better yet, store a Arc<[Field]> along
81//  with a positional index to allow cheap step_into.
82#[derive(Clone, Default, Debug, PartialEq, Eq, Hash)]
83#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
84pub struct FieldPath(Vec<Field>);
85
86impl FieldPath {
87    /// The selector for the root (i.e., the top-level struct itself)
88    pub fn root() -> Self {
89        Self::default()
90    }
91
92    /// Constructs a new `FieldPath` from a single field selector (i.e., a direct child field of the top-level struct)
93    pub fn from_name<F: Into<Field>>(name: F) -> Self {
94        Self(vec![name.into()])
95    }
96
97    /// Returns the sequence of field selectors that make up this path
98    pub fn path(&self) -> &[Field] {
99        &self.0
100    }
101
102    /// Returns whether this path is a root path.
103    pub fn is_root(&self) -> bool {
104        self.0.is_empty()
105    }
106
107    /// Pushes a new field selector to the end of this path
108    pub fn push<F: Into<Field>>(mut self, field: F) -> Self {
109        self.0.push(field.into());
110        self
111    }
112
113    /// Whether the path starts with the given field name
114    /// TODO(joe): handle asserts better.
115    pub fn starts_with_field(&self, field: &Field) -> bool {
116        assert!(matches!(field, Field::Name(_)));
117        let first = self.0.first();
118        assert!(matches!(first, Some(Field::Name(_))));
119        first.is_some_and(|f| f == field)
120    }
121
122    /// Steps into the next field in the path
123    pub fn step_into(mut self) -> Option<Self> {
124        if self.0.is_empty() {
125            return None;
126        }
127        self.0 = self.0.iter().skip(1).cloned().collect();
128        Some(self)
129    }
130
131    /// The dtype, within the given type, to which this field path refers.
132    ///
133    /// Note that a nullable DType may contain a non-nullable DType. This function returns the
134    /// literal nullability of the child.
135    ///
136    /// # Examples
137    ///
138    /// Extract the type of the "b" field from `struct{a: list(struct{b: u32})?}`:
139    ///
140    /// ```
141    /// use std::sync::Arc;
142    ///
143    /// use vortex_dtype::*;
144    /// use vortex_dtype::Nullability::*;
145    ///
146    /// let dtype = DType::Struct(
147    ///     Arc::new(StructFields::from_iter([(
148    ///         "a",
149    ///         DType::List(
150    ///             Arc::new(DType::Struct(
151    ///                 Arc::new(StructFields::from_iter([(
152    ///                     "b",
153    ///                     DType::Primitive(PType::U32, NonNullable),
154    ///                 )])),
155    ///                 NonNullable,
156    ///             )),
157    ///             Nullable,
158    ///         ),
159    ///     )])),
160    ///     NonNullable,
161    /// );
162    ///
163    /// let path = FieldPath::from(vec![Field::from("a"), Field::ElementType, Field::from("b")]);
164    /// let resolved = path.resolve(dtype).unwrap();
165    /// assert_eq!(resolved, DType::Primitive(PType::U32, NonNullable));
166    /// ```
167    pub fn resolve(&self, mut dtype: DType) -> VortexResult<DType> {
168        for field in &self.0 {
169            dtype = match (dtype, field) {
170                (DType::Struct(fields, _), Field::Name(name)) => fields.field(name)?,
171                (DType::List(element_dtype, _), Field::ElementType) => DType::clone(&element_dtype),
172                (other, f) => {
173                    vortex_bail!("FieldPath: invalid index {:?} for DType {:?}", f, other)
174                }
175            }
176        }
177
178        Ok(dtype)
179    }
180
181    /// Does the field referenced by the field path exist in the given dtype?
182    pub fn exists(&self, dtype: DType) -> bool {
183        // Indexing a struct type always allocates anyway.
184        self.resolve(dtype).is_ok()
185    }
186}
187
188impl FromIterator<Field> for FieldPath {
189    fn from_iter<T: IntoIterator<Item = Field>>(iter: T) -> Self {
190        FieldPath(iter.into_iter().collect())
191    }
192}
193
194impl From<Field> for FieldPath {
195    fn from(value: Field) -> Self {
196        FieldPath(vec![value])
197    }
198}
199
200impl From<Vec<Field>> for FieldPath {
201    fn from(value: Vec<Field>) -> Self {
202        FieldPath(value)
203    }
204}
205
206impl Display for FieldPath {
207    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
208        Display::fmt(&self.0.iter().format("."), f)
209    }
210}
211
212#[derive(Default, Clone, Debug)]
213/// Contains a set of field paths, and can answer efficient field path contains queries.
214pub struct FieldPathSet {
215    /// While this is currently a set wrapper it can be replaced with a trie.
216    // TODO(joe): this can be replaced with a `FieldPath` trie
217    set: HashSet<FieldPath>,
218}
219
220impl FieldPathSet {
221    /// Checks if a set contains a field path
222    pub fn contains(&self, path: &FieldPath) -> bool {
223        self.set.contains(path)
224    }
225}
226
227impl FromIterator<FieldPath> for FieldPathSet {
228    fn from_iter<T: IntoIterator<Item = FieldPath>>(iter: T) -> Self {
229        let set = HashSet::from_iter(iter);
230        Self { set }
231    }
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237    use crate::Nullability::*;
238    use crate::{DType, PType, StructFields};
239
240    #[test]
241    fn test_field_path() {
242        let path = FieldPath::from_name("A").push("B").push("C");
243        assert_eq!(path.to_string(), "$A.$B.$C");
244
245        let fields = vec!["A", "B", "C"]
246            .into_iter()
247            .map(Field::from)
248            .collect_vec();
249        assert_eq!(path.path(), &fields);
250
251        let vec_path = FieldPath::from(fields);
252        assert_eq!(vec_path.to_string(), "$A.$B.$C");
253        assert_eq!(path, vec_path);
254    }
255
256    #[test]
257    fn nested_field_single_level() {
258        let a_type = DType::Primitive(PType::I32, NonNullable);
259        let dtype = DType::Struct(
260            Arc::from(StructFields::from_iter([
261                ("a", a_type.clone()),
262                ("b", DType::Bool(Nullable)),
263            ])),
264            NonNullable,
265        );
266        let path = FieldPath::from_name("a");
267        assert_eq!(a_type, path.resolve(dtype.clone()).unwrap());
268        assert!(path.exists(dtype));
269    }
270
271    #[test]
272    fn nested_field_two_level() {
273        let inner = DType::Struct(
274            Arc::new(StructFields::from_iter([
275                ("inner_a", DType::Primitive(PType::U8, NonNullable)),
276                ("inner_b", DType::Bool(Nullable)),
277            ])),
278            NonNullable,
279        );
280
281        let outer = DType::Struct(
282            Arc::from(StructFields::from_iter([
283                ("outer_a", DType::Bool(NonNullable)),
284                ("outer_b", inner),
285            ])),
286            NonNullable,
287        );
288
289        let path = FieldPath::from_name("outer_b").push("inner_a");
290        let dtype = path.resolve(outer.clone()).unwrap();
291
292        assert_eq!(dtype, DType::Primitive(PType::U8, NonNullable));
293        assert!(path.exists(outer));
294    }
295
296    #[test]
297    fn nested_field_deep_nested() {
298        let level4 = DType::Struct(
299            Arc::new(StructFields::from_iter([(
300                "c",
301                DType::Primitive(PType::F64, Nullable),
302            )])),
303            NonNullable,
304        );
305
306        let level3 = DType::List(Arc::from(level4), Nullable);
307
308        let level2 = DType::Struct(
309            Arc::new(StructFields::from_iter([("b", level3)])),
310            NonNullable,
311        );
312
313        let level1 = DType::Struct(
314            Arc::from(StructFields::from_iter([("a", level2)])),
315            NonNullable,
316        );
317
318        let path = FieldPath::from_name("a")
319            .push("b")
320            .push(Field::ElementType)
321            .push("c");
322        let dtype = path.resolve(level1.clone()).unwrap();
323
324        assert_eq!(dtype, DType::Primitive(PType::F64, Nullable));
325        assert!(path.exists(level1.clone()));
326
327        let path = FieldPath::from_name("a")
328            .push("b")
329            .push("c")
330            .push(Field::ElementType);
331        assert!(path.resolve(level1.clone()).is_err());
332        assert!(!path.exists(level1.clone()));
333
334        let path = FieldPath::from_name("a")
335            .push(Field::ElementType)
336            .push("b")
337            .push("c");
338        assert!(path.resolve(level1.clone()).is_err());
339        assert!(!path.exists(level1.clone()));
340
341        let path = FieldPath::from_name(Field::ElementType)
342            .push("a")
343            .push("b")
344            .push("c");
345        assert!(path.resolve(level1.clone()).is_err());
346        assert!(!path.exists(level1));
347    }
348
349    #[test]
350    fn nested_field_not_found() {
351        let dtype = DType::Struct(
352            Arc::from(StructFields::from_iter([("a", DType::Bool(NonNullable))])),
353            NonNullable,
354        );
355        let path = FieldPath::from_name("b");
356        assert!(path.resolve(dtype.clone()).is_err());
357        assert!(!path.exists(dtype.clone()));
358
359        let path = FieldPath::from(Field::ElementType);
360        assert!(path.resolve(dtype.clone()).is_err());
361        assert!(!path.exists(dtype));
362    }
363
364    #[test]
365    fn nested_field_non_struct_intermediate() {
366        let dtype = DType::Struct(
367            Arc::from(StructFields::from_iter([(
368                "a",
369                DType::Primitive(PType::I32, NonNullable),
370            )])),
371            NonNullable,
372        );
373        let path = FieldPath::from_name("a").push("b");
374        assert!(path.resolve(dtype.clone()).is_err());
375        assert!(!path.exists(dtype.clone()));
376
377        let path = FieldPath::from_name("a").push(Field::ElementType);
378        assert!(path.resolve(dtype.clone()).is_err());
379        assert!(!path.exists(dtype));
380    }
381}