vortex_dtype/
field.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Selectors for fields or elements in (possibly nested) `DType`s
5//!
6//! A `Field` indexes a single layer of `DType`, for example: a name in a struct or the element of a
7//! list. A `FieldPath` indexes zero or more layers, for example: the field "child" which is within
8//! the struct field "parent" which is within the struct field "grandparent".
9
10use core::fmt;
11use std::fmt::{Display, Formatter};
12use std::sync::Arc;
13
14use itertools::Itertools;
15use vortex_utils::aliases::hash_set::HashSet;
16
17use crate::{DType, FieldName};
18
19/// Selects a nested type within either a struct or a list.
20#[derive(Clone, Debug, PartialEq, Eq, Hash)]
21#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
22pub enum Field {
23    /// Address a field of a [`crate::DType::Struct`].
24    Name(FieldName),
25    /// Address the element type of a [`crate::DType::List`].
26    ElementType,
27}
28
29impl Field {
30    /// Retrieve a field name if it has one
31    pub fn as_name(&self) -> Option<&str> {
32        match self {
33            Field::Name(name) => Some(name),
34            Field::ElementType => None,
35        }
36    }
37
38    /// Returns true if the field is defined by Name
39    pub fn is_named(&self) -> bool {
40        matches!(self, Field::Name(_))
41    }
42}
43
44impl From<&str> for Field {
45    fn from(value: &str) -> Self {
46        Field::Name(value.into())
47    }
48}
49
50impl From<Arc<str>> for Field {
51    fn from(value: Arc<str>) -> Self {
52        Self::Name(value)
53    }
54}
55
56impl Display for Field {
57    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
58        match self {
59            Field::Name(name) => write!(f, "${name}"),
60            Field::ElementType => write!(f, "[]"),
61        }
62    }
63}
64
65/// A sequence of field selectors representing a path through zero or more layers of `DType`.
66///
67/// # Examples
68///
69/// The empty path references the root:
70///
71/// ```
72/// use vortex_dtype::*;
73///
74/// let dtype_i32 = DType::Primitive(PType::I32, Nullability::NonNullable);
75/// assert_eq!(dtype_i32, FieldPath::root().resolve(dtype_i32.clone()).unwrap());
76/// ```
77///
78// TODO(ngates): we should probably reverse the path. Or better yet, store a Arc<[Field]> along
79//  with a positional index to allow cheap step_into.
80#[derive(Clone, Default, Debug, PartialEq, Eq, Hash)]
81#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
82pub struct FieldPath(Vec<Field>);
83
84impl FieldPath {
85    /// The selector for the root (i.e., the top-level struct itself)
86    pub fn root() -> Self {
87        Self::default()
88    }
89
90    /// Constructs a new `FieldPath` from a single field selector (i.e., a direct child field of the top-level struct)
91    pub fn from_name<F: Into<Field>>(name: F) -> Self {
92        Self(vec![name.into()])
93    }
94
95    /// Returns the sequence of field selectors that make up this path
96    pub fn parts(&self) -> &[Field] {
97        &self.0
98    }
99
100    /// Returns whether this path is a root path.
101    pub fn is_root(&self) -> bool {
102        self.0.is_empty()
103    }
104
105    /// Pushes a new field selector to the end of this path
106    pub fn push<F: Into<Field>>(mut self, field: F) -> Self {
107        self.0.push(field.into());
108        self
109    }
110
111    /// Whether the path starts with the given field name
112    /// TODO(joe): handle asserts better.
113    pub fn starts_with_field(&self, field: &Field) -> bool {
114        assert!(matches!(field, Field::Name(_)));
115        let first = self.0.first();
116        assert!(matches!(first, Some(Field::Name(_))));
117        first.is_some_and(|f| f == field)
118    }
119
120    /// Steps into the next field in the path
121    pub fn step_into(mut self) -> Option<Self> {
122        if self.0.is_empty() {
123            return None;
124        }
125        self.0 = self.0.into_iter().skip(1).collect();
126        Some(self)
127    }
128
129    /// The dtype, within the given type, to which this field path refers.
130    ///
131    /// Note that a nullable DType may contain a non-nullable DType. This function returns the
132    /// literal nullability of the child.
133    ///
134    /// # Examples
135    ///
136    /// Extract the type of the "b" field from `struct{a: list(struct{b: u32})?}`:
137    ///
138    /// ```
139    /// use std::sync::Arc;
140    ///
141    /// use vortex_dtype::*;
142    /// use vortex_dtype::Nullability::*;
143    ///
144    /// let dtype = DType::Struct(
145    ///     StructFields::from_iter([(
146    ///         "a",
147    ///         DType::List(
148    ///             Arc::new(DType::Struct(
149    ///                 StructFields::from_iter([(
150    ///                     "b",
151    ///                     DType::Primitive(PType::U32, NonNullable),
152    ///                 )]),
153    ///                 NonNullable,
154    ///             )),
155    ///             Nullable,
156    ///         ),
157    ///     )]),
158    ///     NonNullable,
159    /// );
160    ///
161    /// let path = FieldPath::from(vec![Field::from("a"), Field::ElementType, Field::from("b")]);
162    /// let resolved = path.resolve(dtype).unwrap();
163    /// assert_eq!(resolved, DType::Primitive(PType::U32, NonNullable));
164    /// ```
165    pub fn resolve(&self, mut dtype: DType) -> Option<DType> {
166        for field in &self.0 {
167            dtype = match (dtype, field) {
168                (DType::Struct(fields, _), Field::Name(name)) => fields.field(name)?,
169                (DType::List(element_dtype, _), Field::ElementType) => {
170                    element_dtype.as_ref().clone()
171                }
172                (..) => return None,
173            }
174        }
175
176        Some(dtype)
177    }
178
179    /// Does the field referenced by the field path exist in the given dtype?
180    pub fn exists_in(&self, dtype: DType) -> bool {
181        // Indexing a struct type always allocates anyway.
182        self.resolve(dtype).is_some()
183    }
184}
185
186impl FromIterator<Field> for FieldPath {
187    fn from_iter<T: IntoIterator<Item = Field>>(iter: T) -> Self {
188        FieldPath(iter.into_iter().collect())
189    }
190}
191
192impl From<Field> for FieldPath {
193    fn from(value: Field) -> Self {
194        FieldPath(vec![value])
195    }
196}
197
198impl From<Vec<Field>> for FieldPath {
199    fn from(value: Vec<Field>) -> Self {
200        FieldPath(value)
201    }
202}
203
204impl Display for FieldPath {
205    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
206        Display::fmt(&self.0.iter().format("."), f)
207    }
208}
209
210#[derive(Default, Clone, Debug)]
211/// Contains a set of field paths, and can answer an efficient field path contains queries.
212pub struct FieldPathSet {
213    /// While this is currently a set wrapper it can be replaced with a trie.
214    // TODO(joe): this can be replaced with a `FieldPath` trie
215    set: HashSet<FieldPath>,
216}
217
218impl FieldPathSet {
219    /// Checks if a set contains a field path
220    pub fn contains(&self, path: &FieldPath) -> bool {
221        self.set.contains(path)
222    }
223}
224
225impl FromIterator<FieldPath> for FieldPathSet {
226    fn from_iter<T: IntoIterator<Item = FieldPath>>(iter: T) -> Self {
227        let set = HashSet::from_iter(iter);
228        Self { set }
229    }
230}
231
232#[cfg(test)]
233mod tests {
234    use super::*;
235    use crate::Nullability::*;
236    use crate::{DType, PType, StructFields};
237
238    #[test]
239    fn test_field_path() {
240        let path = FieldPath::from_name("A").push("B").push("C");
241        assert_eq!(path.to_string(), "$A.$B.$C");
242
243        let fields = vec!["A", "B", "C"]
244            .into_iter()
245            .map(Field::from)
246            .collect_vec();
247        assert_eq!(path.parts(), &fields);
248
249        let vec_path = FieldPath::from(fields);
250        assert_eq!(vec_path.to_string(), "$A.$B.$C");
251        assert_eq!(path, vec_path);
252    }
253
254    #[test]
255    fn nested_field_single_level() {
256        let a_type = DType::Primitive(PType::I32, NonNullable);
257        let dtype = DType::struct_(
258            [("a", a_type.clone()), ("b", DType::Bool(Nullable))],
259            NonNullable,
260        );
261        let path = FieldPath::from_name("a");
262        assert_eq!(a_type, path.resolve(dtype.clone()).unwrap());
263        assert!(path.exists_in(dtype));
264    }
265
266    #[test]
267    fn nested_field_two_level() {
268        let inner = DType::struct_(
269            [
270                ("inner_a", DType::Primitive(PType::U8, NonNullable)),
271                ("inner_b", DType::Bool(Nullable)),
272            ],
273            NonNullable,
274        );
275
276        let outer = DType::Struct(
277            StructFields::from_iter([("outer_a", DType::Bool(NonNullable)), ("outer_b", inner)]),
278            NonNullable,
279        );
280
281        let path = FieldPath::from_name("outer_b").push("inner_a");
282        let dtype = path.resolve(outer.clone()).unwrap();
283
284        assert_eq!(dtype, DType::Primitive(PType::U8, NonNullable));
285        assert!(path.exists_in(outer));
286    }
287
288    #[test]
289    fn nested_field_deep_nested() {
290        let level1 = DType::struct_(
291            [(
292                "a",
293                DType::struct_(
294                    [(
295                        "b",
296                        DType::list(
297                            DType::struct_(
298                                [("c", DType::Primitive(PType::F64, Nullable))],
299                                NonNullable,
300                            ),
301                            Nullable,
302                        ),
303                    )],
304                    NonNullable,
305                ),
306            )],
307            NonNullable,
308        );
309
310        let path = FieldPath::from_name("a")
311            .push("b")
312            .push(Field::ElementType)
313            .push("c");
314        let dtype = path.resolve(level1.clone()).unwrap();
315
316        assert_eq!(dtype, DType::Primitive(PType::F64, Nullable));
317        assert!(path.exists_in(level1.clone()));
318
319        let path = FieldPath::from_name("a")
320            .push("b")
321            .push("c")
322            .push(Field::ElementType);
323        assert!(path.resolve(level1.clone()).is_none());
324        assert!(!path.exists_in(level1.clone()));
325
326        let path = FieldPath::from_name("a")
327            .push(Field::ElementType)
328            .push("b")
329            .push("c");
330        assert!(path.resolve(level1.clone()).is_none());
331        assert!(!path.exists_in(level1.clone()));
332
333        let path = FieldPath::from_name(Field::ElementType)
334            .push("a")
335            .push("b")
336            .push("c");
337        assert!(path.resolve(level1.clone()).is_none());
338        assert!(!path.exists_in(level1));
339    }
340
341    #[test]
342    fn nested_field_not_found() {
343        let dtype = DType::struct_([("a", DType::Bool(NonNullable))], NonNullable);
344        let path = FieldPath::from_name("b");
345        assert!(path.resolve(dtype.clone()).is_none());
346        assert!(!path.exists_in(dtype.clone()));
347
348        let path = FieldPath::from(Field::ElementType);
349        assert!(path.resolve(dtype.clone()).is_none());
350        assert!(!path.exists_in(dtype));
351    }
352
353    #[test]
354    fn nested_field_non_struct_intermediate() {
355        let dtype = DType::struct_(
356            [("a", DType::Primitive(PType::I32, NonNullable))],
357            NonNullable,
358        );
359        let path = FieldPath::from_name("a").push("b");
360        assert!(path.resolve(dtype.clone()).is_none());
361        assert!(!path.exists_in(dtype.clone()));
362
363        let path = FieldPath::from_name("a").push(Field::ElementType);
364        assert!(path.resolve(dtype.clone()).is_none());
365        assert!(!path.exists_in(dtype));
366    }
367}