vortex_dtype/
field.rs

1//! Selectors for fields or elements in (possibly nested) `DType`s
2//!
3//! A `Field` indexes a single layer of `DType`, for example: a name in a struct or the element of a
4//! list. A `FieldPath` indexes zero or more layers, for example: the field "child" which is within
5//! the struct field "parent" which is within the struct field "grandparent".
6
7use core::fmt;
8use std::fmt::{Display, Formatter};
9use std::sync::Arc;
10
11use itertools::Itertools;
12use vortex_error::{VortexResult, vortex_bail};
13
14use crate::DType;
15
16/// Selects a nested type within either a struct or a list.
17#[derive(Clone, Debug, PartialEq, Eq, Hash)]
18#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
19pub enum Field {
20    /// Address a field of a [`crate::DType::Struct`].
21    Name(Arc<str>),
22    /// Address the element type of a [`crate::DType::List`].
23    ElementType,
24}
25
26impl From<&str> for Field {
27    fn from(value: &str) -> Self {
28        Field::Name(value.into())
29    }
30}
31
32impl From<Arc<str>> for Field {
33    fn from(value: Arc<str>) -> Self {
34        Self::Name(value)
35    }
36}
37
38impl From<&Arc<str>> for Field {
39    fn from(value: &Arc<str>) -> Self {
40        Self::Name(value.clone())
41    }
42}
43
44impl From<String> for Field {
45    fn from(value: String) -> Self {
46        Field::Name(value.into())
47    }
48}
49
50impl Display for Field {
51    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
52        match self {
53            Field::Name(name) => write!(f, "${name}"),
54            Field::ElementType => write!(f, "[]"),
55        }
56    }
57}
58
59impl Field {
60    /// Returns true if the field is defined by Name
61    pub fn is_named(&self) -> bool {
62        matches!(self, Field::Name(_))
63    }
64}
65
66/// A sequence of field selectors representing a path through zero or more layers of `DType`.
67///
68/// # Examples
69///
70/// The empty path references the root:
71///
72/// ```
73/// use vortex_dtype::*;
74///
75/// let dtype_i32 = DType::Primitive(PType::I32, Nullability::NonNullable);
76/// assert_eq!(dtype_i32, FieldPath::root().resolve(dtype_i32.clone()).unwrap());
77/// ```
78///
79// TODO(ngates): we should probably reverse the path. Or better yet, store a Arc<[Field]> along
80//  with a positional index to allow cheap step_into.
81#[derive(Clone, Default, Debug, PartialEq, Eq, Hash)]
82#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
83pub struct FieldPath(Vec<Field>);
84
85impl FieldPath {
86    /// The selector for the root (i.e., the top-level struct itself)
87    pub fn root() -> Self {
88        Self::default()
89    }
90
91    /// Constructs a new `FieldPath` from a single field selector (i.e., a direct child field of the top-level struct)
92    pub fn from_name<F: Into<Field>>(name: F) -> Self {
93        Self(vec![name.into()])
94    }
95
96    /// Returns the sequence of field selectors that make up this path
97    pub fn path(&self) -> &[Field] {
98        &self.0
99    }
100
101    /// Returns whether this path is a root path.
102    pub fn is_root(&self) -> bool {
103        self.0.is_empty()
104    }
105
106    /// Pushes a new field selector to the end of this path
107    pub fn push<F: Into<Field>>(mut self, field: F) -> Self {
108        self.0.push(field.into());
109        self
110    }
111
112    /// Whether the path starts with the given field name
113    /// TODO(joe): handle asserts better.
114    pub fn starts_with_field(&self, field: &Field) -> bool {
115        assert!(matches!(field, Field::Name(_)));
116        let first = self.0.first();
117        assert!(matches!(first, Some(Field::Name(_))));
118        first.is_some_and(|f| f == field)
119    }
120
121    /// Steps into the next field in the path
122    pub fn step_into(mut self) -> Option<Self> {
123        if self.0.is_empty() {
124            return None;
125        }
126        self.0 = self.0.iter().skip(1).cloned().collect();
127        Some(self)
128    }
129
130    /// The dtype, within the given type, to which this field path refers.
131    ///
132    /// Note that a nullable DType may contain a non-nullable DType. This function returns the
133    /// literal nullability of the child.
134    ///
135    /// # Examples
136    ///
137    /// Extract the type of the "b" field from `struct{a: list(struct{b: u32})?}`:
138    ///
139    /// ```
140    /// use std::sync::Arc;
141    ///
142    /// use vortex_dtype::*;
143    /// use vortex_dtype::Nullability::*;
144    ///
145    /// let dtype = DType::Struct(
146    ///     Arc::new(StructFields::from_iter([(
147    ///         "a",
148    ///         DType::List(
149    ///             Arc::new(DType::Struct(
150    ///                 Arc::new(StructFields::from_iter([(
151    ///                     "b",
152    ///                     DType::Primitive(PType::U32, NonNullable),
153    ///                 )])),
154    ///                 NonNullable,
155    ///             )),
156    ///             Nullable,
157    ///         ),
158    ///     )])),
159    ///     NonNullable,
160    /// );
161    ///
162    /// let path = FieldPath::from(vec![Field::from("a"), Field::ElementType, Field::from("b")]);
163    /// let resolved = path.resolve(dtype).unwrap();
164    /// assert_eq!(resolved, DType::Primitive(PType::U32, NonNullable));
165    /// ```
166    pub fn resolve(&self, mut dtype: DType) -> VortexResult<DType> {
167        for field in &self.0 {
168            dtype = match (dtype, field) {
169                (DType::Struct(fields, _), Field::Name(name)) => fields.field(name)?,
170                (DType::List(element_dtype, _), Field::ElementType) => DType::clone(&element_dtype),
171                (other, f) => {
172                    vortex_bail!("FieldPath: invalid index {:?} for DType {:?}", f, other)
173                }
174            }
175        }
176
177        Ok(dtype)
178    }
179
180    /// Does the field referenced by the field path exist in the given dtype?
181    pub fn exists(&self, dtype: DType) -> bool {
182        // Indexing a struct type always allocates anyway.
183        self.resolve(dtype).is_ok()
184    }
185}
186
187impl FromIterator<Field> for FieldPath {
188    fn from_iter<T: IntoIterator<Item = Field>>(iter: T) -> Self {
189        FieldPath(iter.into_iter().collect())
190    }
191}
192
193impl From<Field> for FieldPath {
194    fn from(value: Field) -> Self {
195        FieldPath(vec![value])
196    }
197}
198
199impl From<Vec<Field>> for FieldPath {
200    fn from(value: Vec<Field>) -> Self {
201        FieldPath(value)
202    }
203}
204
205impl Display for FieldPath {
206    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
207        Display::fmt(&self.0.iter().format("."), f)
208    }
209}
210
211#[cfg(test)]
212mod tests {
213    use super::*;
214    use crate::Nullability::*;
215    use crate::{DType, PType, StructFields};
216
217    #[test]
218    fn test_field_path() {
219        let path = FieldPath::from_name("A").push("B").push("C");
220        assert_eq!(path.to_string(), "$A.$B.$C");
221
222        let fields = vec!["A", "B", "C"]
223            .into_iter()
224            .map(Field::from)
225            .collect_vec();
226        assert_eq!(path.path(), &fields);
227
228        let vec_path = FieldPath::from(fields);
229        assert_eq!(vec_path.to_string(), "$A.$B.$C");
230        assert_eq!(path, vec_path);
231    }
232
233    #[test]
234    fn nested_field_single_level() {
235        let a_type = DType::Primitive(PType::I32, NonNullable);
236        let dtype = DType::Struct(
237            Arc::from(StructFields::from_iter([
238                ("a", a_type.clone()),
239                ("b", DType::Bool(Nullable)),
240            ])),
241            NonNullable,
242        );
243        let path = FieldPath::from_name("a");
244        assert_eq!(a_type, path.resolve(dtype.clone()).unwrap());
245        assert!(path.exists(dtype));
246    }
247
248    #[test]
249    fn nested_field_two_level() {
250        let inner = DType::Struct(
251            Arc::new(StructFields::from_iter([
252                ("inner_a", DType::Primitive(PType::U8, NonNullable)),
253                ("inner_b", DType::Bool(Nullable)),
254            ])),
255            NonNullable,
256        );
257
258        let outer = DType::Struct(
259            Arc::from(StructFields::from_iter([
260                ("outer_a", DType::Bool(NonNullable)),
261                ("outer_b", inner),
262            ])),
263            NonNullable,
264        );
265
266        let path = FieldPath::from_name("outer_b").push("inner_a");
267        let dtype = path.resolve(outer.clone()).unwrap();
268
269        assert_eq!(dtype, DType::Primitive(PType::U8, NonNullable));
270        assert!(path.exists(outer));
271    }
272
273    #[test]
274    fn nested_field_deep_nested() {
275        let level4 = DType::Struct(
276            Arc::new(StructFields::from_iter([(
277                "c",
278                DType::Primitive(PType::F64, Nullable),
279            )])),
280            NonNullable,
281        );
282
283        let level3 = DType::List(Arc::from(level4), Nullable);
284
285        let level2 = DType::Struct(
286            Arc::new(StructFields::from_iter([("b", level3)])),
287            NonNullable,
288        );
289
290        let level1 = DType::Struct(
291            Arc::from(StructFields::from_iter([("a", level2)])),
292            NonNullable,
293        );
294
295        let path = FieldPath::from_name("a")
296            .push("b")
297            .push(Field::ElementType)
298            .push("c");
299        let dtype = path.resolve(level1.clone()).unwrap();
300
301        assert_eq!(dtype, DType::Primitive(PType::F64, Nullable));
302        assert!(path.exists(level1.clone()));
303
304        let path = FieldPath::from_name("a")
305            .push("b")
306            .push("c")
307            .push(Field::ElementType);
308        assert!(path.resolve(level1.clone()).is_err());
309        assert!(!path.exists(level1.clone()));
310
311        let path = FieldPath::from_name("a")
312            .push(Field::ElementType)
313            .push("b")
314            .push("c");
315        assert!(path.resolve(level1.clone()).is_err());
316        assert!(!path.exists(level1.clone()));
317
318        let path = FieldPath::from_name(Field::ElementType)
319            .push("a")
320            .push("b")
321            .push("c");
322        assert!(path.resolve(level1.clone()).is_err());
323        assert!(!path.exists(level1));
324    }
325
326    #[test]
327    fn nested_field_not_found() {
328        let dtype = DType::Struct(
329            Arc::from(StructFields::from_iter([("a", DType::Bool(NonNullable))])),
330            NonNullable,
331        );
332        let path = FieldPath::from_name("b");
333        assert!(path.resolve(dtype.clone()).is_err());
334        assert!(!path.exists(dtype.clone()));
335
336        let path = FieldPath::from(Field::ElementType);
337        assert!(path.resolve(dtype.clone()).is_err());
338        assert!(!path.exists(dtype));
339    }
340
341    #[test]
342    fn nested_field_non_struct_intermediate() {
343        let dtype = DType::Struct(
344            Arc::from(StructFields::from_iter([(
345                "a",
346                DType::Primitive(PType::I32, NonNullable),
347            )])),
348            NonNullable,
349        );
350        let path = FieldPath::from_name("a").push("b");
351        assert!(path.resolve(dtype.clone()).is_err());
352        assert!(!path.exists(dtype.clone()));
353
354        let path = FieldPath::from_name("a").push(Field::ElementType);
355        assert!(path.resolve(dtype.clone()).is_err());
356        assert!(!path.exists(dtype));
357    }
358}