vortex_dtype/
field.rs

1//! Selectors for fields or elements in (possibly nested) `DType`s
2//!
3//! A `Field` indexes a single layer of `DType`, for example: a name in a struct or the element of a
4//! list. A `FieldPath` indexes zero or more layers, for example: the field "child" which is within
5//! the struct field "parent" which is within the struct field "grandparent".
6
7use core::fmt;
8use std::fmt::{Display, Formatter};
9use std::sync::Arc;
10
11use itertools::Itertools;
12use vortex_utils::aliases::hash_set::HashSet;
13
14use crate::{DType, FieldName};
15
16/// Selects a nested type within either a struct or a list.
17#[derive(Clone, Debug, PartialEq, Eq, Hash)]
18#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
19pub enum Field {
20    /// Address a field of a [`crate::DType::Struct`].
21    Name(FieldName),
22    /// Address the element type of a [`crate::DType::List`].
23    ElementType,
24}
25
26impl Field {
27    /// Retrieve a field name if it has one
28    pub fn as_name(&self) -> Option<&str> {
29        match self {
30            Field::Name(name) => Some(name),
31            Field::ElementType => None,
32        }
33    }
34
35    /// Returns true if the field is defined by Name
36    pub fn is_named(&self) -> bool {
37        matches!(self, Field::Name(_))
38    }
39}
40
41impl From<&str> for Field {
42    fn from(value: &str) -> Self {
43        Field::Name(value.into())
44    }
45}
46
47impl From<Arc<str>> for Field {
48    fn from(value: Arc<str>) -> Self {
49        Self::Name(value)
50    }
51}
52
53impl Display for Field {
54    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
55        match self {
56            Field::Name(name) => write!(f, "${name}"),
57            Field::ElementType => write!(f, "[]"),
58        }
59    }
60}
61
62/// A sequence of field selectors representing a path through zero or more layers of `DType`.
63///
64/// # Examples
65///
66/// The empty path references the root:
67///
68/// ```
69/// use vortex_dtype::*;
70///
71/// let dtype_i32 = DType::Primitive(PType::I32, Nullability::NonNullable);
72/// assert_eq!(dtype_i32, FieldPath::root().resolve(dtype_i32.clone()).unwrap());
73/// ```
74///
75// TODO(ngates): we should probably reverse the path. Or better yet, store a Arc<[Field]> along
76//  with a positional index to allow cheap step_into.
77#[derive(Clone, Default, Debug, PartialEq, Eq, Hash)]
78#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
79pub struct FieldPath(Vec<Field>);
80
81impl FieldPath {
82    /// The selector for the root (i.e., the top-level struct itself)
83    pub fn root() -> Self {
84        Self::default()
85    }
86
87    /// Constructs a new `FieldPath` from a single field selector (i.e., a direct child field of the top-level struct)
88    pub fn from_name<F: Into<Field>>(name: F) -> Self {
89        Self(vec![name.into()])
90    }
91
92    /// Returns the sequence of field selectors that make up this path
93    pub fn path(&self) -> &[Field] {
94        &self.0
95    }
96
97    /// Returns whether this path is a root path.
98    pub fn is_root(&self) -> bool {
99        self.0.is_empty()
100    }
101
102    /// Pushes a new field selector to the end of this path
103    pub fn push<F: Into<Field>>(mut self, field: F) -> Self {
104        self.0.push(field.into());
105        self
106    }
107
108    /// Whether the path starts with the given field name
109    /// TODO(joe): handle asserts better.
110    pub fn starts_with_field(&self, field: &Field) -> bool {
111        assert!(matches!(field, Field::Name(_)));
112        let first = self.0.first();
113        assert!(matches!(first, Some(Field::Name(_))));
114        first.is_some_and(|f| f == field)
115    }
116
117    /// Steps into the next field in the path
118    pub fn step_into(mut self) -> Option<Self> {
119        if self.0.is_empty() {
120            return None;
121        }
122        self.0 = self.0.into_iter().skip(1).collect();
123        Some(self)
124    }
125
126    /// The dtype, within the given type, to which this field path refers.
127    ///
128    /// Note that a nullable DType may contain a non-nullable DType. This function returns the
129    /// literal nullability of the child.
130    ///
131    /// # Examples
132    ///
133    /// Extract the type of the "b" field from `struct{a: list(struct{b: u32})?}`:
134    ///
135    /// ```
136    /// use std::sync::Arc;
137    ///
138    /// use vortex_dtype::*;
139    /// use vortex_dtype::Nullability::*;
140    ///
141    /// let dtype = DType::Struct(
142    ///     StructFields::from_iter([(
143    ///         "a",
144    ///         DType::List(
145    ///             Arc::new(DType::Struct(
146    ///                 StructFields::from_iter([(
147    ///                     "b",
148    ///                     DType::Primitive(PType::U32, NonNullable),
149    ///                 )]),
150    ///                 NonNullable,
151    ///             )),
152    ///             Nullable,
153    ///         ),
154    ///     )]),
155    ///     NonNullable,
156    /// );
157    ///
158    /// let path = FieldPath::from(vec![Field::from("a"), Field::ElementType, Field::from("b")]);
159    /// let resolved = path.resolve(dtype).unwrap();
160    /// assert_eq!(resolved, DType::Primitive(PType::U32, NonNullable));
161    /// ```
162    pub fn resolve(&self, mut dtype: DType) -> Option<DType> {
163        for field in &self.0 {
164            dtype = match (dtype, field) {
165                (DType::Struct(fields, _), Field::Name(name)) => fields.field(name)?,
166                (DType::List(element_dtype, _), Field::ElementType) => {
167                    element_dtype.as_ref().clone()
168                }
169                (..) => return None,
170            }
171        }
172
173        Some(dtype)
174    }
175
176    /// Does the field referenced by the field path exist in the given dtype?
177    pub fn exists(&self, dtype: DType) -> bool {
178        // Indexing a struct type always allocates anyway.
179        self.resolve(dtype).is_some()
180    }
181}
182
183impl FromIterator<Field> for FieldPath {
184    fn from_iter<T: IntoIterator<Item = Field>>(iter: T) -> Self {
185        FieldPath(iter.into_iter().collect())
186    }
187}
188
189impl From<Field> for FieldPath {
190    fn from(value: Field) -> Self {
191        FieldPath(vec![value])
192    }
193}
194
195impl From<Vec<Field>> for FieldPath {
196    fn from(value: Vec<Field>) -> Self {
197        FieldPath(value)
198    }
199}
200
201impl Display for FieldPath {
202    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
203        Display::fmt(&self.0.iter().format("."), f)
204    }
205}
206
207#[derive(Default, Clone, Debug)]
208/// Contains a set of field paths, and can answer an efficient field path contains queries.
209pub struct FieldPathSet {
210    /// While this is currently a set wrapper it can be replaced with a trie.
211    // TODO(joe): this can be replaced with a `FieldPath` trie
212    set: HashSet<FieldPath>,
213}
214
215impl FieldPathSet {
216    /// Checks if a set contains a field path
217    pub fn contains(&self, path: &FieldPath) -> bool {
218        self.set.contains(path)
219    }
220}
221
222impl FromIterator<FieldPath> for FieldPathSet {
223    fn from_iter<T: IntoIterator<Item = FieldPath>>(iter: T) -> Self {
224        let set = HashSet::from_iter(iter);
225        Self { set }
226    }
227}
228
229#[cfg(test)]
230mod tests {
231    use super::*;
232    use crate::Nullability::*;
233    use crate::{DType, PType, StructFields};
234
235    #[test]
236    fn test_field_path() {
237        let path = FieldPath::from_name("A").push("B").push("C");
238        assert_eq!(path.to_string(), "$A.$B.$C");
239
240        let fields = vec!["A", "B", "C"]
241            .into_iter()
242            .map(Field::from)
243            .collect_vec();
244        assert_eq!(path.path(), &fields);
245
246        let vec_path = FieldPath::from(fields);
247        assert_eq!(vec_path.to_string(), "$A.$B.$C");
248        assert_eq!(path, vec_path);
249    }
250
251    #[test]
252    fn nested_field_single_level() {
253        let a_type = DType::Primitive(PType::I32, NonNullable);
254        let dtype = DType::struct_(
255            [("a", a_type.clone()), ("b", DType::Bool(Nullable))],
256            NonNullable,
257        );
258        let path = FieldPath::from_name("a");
259        assert_eq!(a_type, path.resolve(dtype.clone()).unwrap());
260        assert!(path.exists(dtype));
261    }
262
263    #[test]
264    fn nested_field_two_level() {
265        let inner = DType::struct_(
266            [
267                ("inner_a", DType::Primitive(PType::U8, NonNullable)),
268                ("inner_b", DType::Bool(Nullable)),
269            ],
270            NonNullable,
271        );
272
273        let outer = DType::Struct(
274            StructFields::from_iter([("outer_a", DType::Bool(NonNullable)), ("outer_b", inner)]),
275            NonNullable,
276        );
277
278        let path = FieldPath::from_name("outer_b").push("inner_a");
279        let dtype = path.resolve(outer.clone()).unwrap();
280
281        assert_eq!(dtype, DType::Primitive(PType::U8, NonNullable));
282        assert!(path.exists(outer));
283    }
284
285    #[test]
286    fn nested_field_deep_nested() {
287        let level1 = DType::struct_(
288            [(
289                "a",
290                DType::struct_(
291                    [(
292                        "b",
293                        DType::list(
294                            DType::struct_(
295                                [("c", DType::Primitive(PType::F64, Nullable))],
296                                NonNullable,
297                            ),
298                            Nullable,
299                        ),
300                    )],
301                    NonNullable,
302                ),
303            )],
304            NonNullable,
305        );
306
307        let path = FieldPath::from_name("a")
308            .push("b")
309            .push(Field::ElementType)
310            .push("c");
311        let dtype = path.resolve(level1.clone()).unwrap();
312
313        assert_eq!(dtype, DType::Primitive(PType::F64, Nullable));
314        assert!(path.exists(level1.clone()));
315
316        let path = FieldPath::from_name("a")
317            .push("b")
318            .push("c")
319            .push(Field::ElementType);
320        assert!(path.resolve(level1.clone()).is_none());
321        assert!(!path.exists(level1.clone()));
322
323        let path = FieldPath::from_name("a")
324            .push(Field::ElementType)
325            .push("b")
326            .push("c");
327        assert!(path.resolve(level1.clone()).is_none());
328        assert!(!path.exists(level1.clone()));
329
330        let path = FieldPath::from_name(Field::ElementType)
331            .push("a")
332            .push("b")
333            .push("c");
334        assert!(path.resolve(level1.clone()).is_none());
335        assert!(!path.exists(level1));
336    }
337
338    #[test]
339    fn nested_field_not_found() {
340        let dtype = DType::struct_([("a", DType::Bool(NonNullable))], NonNullable);
341        let path = FieldPath::from_name("b");
342        assert!(path.resolve(dtype.clone()).is_none());
343        assert!(!path.exists(dtype.clone()));
344
345        let path = FieldPath::from(Field::ElementType);
346        assert!(path.resolve(dtype.clone()).is_none());
347        assert!(!path.exists(dtype));
348    }
349
350    #[test]
351    fn nested_field_non_struct_intermediate() {
352        let dtype = DType::struct_(
353            [("a", DType::Primitive(PType::I32, NonNullable))],
354            NonNullable,
355        );
356        let path = FieldPath::from_name("a").push("b");
357        assert!(path.resolve(dtype.clone()).is_none());
358        assert!(!path.exists(dtype.clone()));
359
360        let path = FieldPath::from_name("a").push(Field::ElementType);
361        assert!(path.resolve(dtype.clone()).is_none());
362        assert!(!path.exists(dtype));
363    }
364}