vortex_dtype/
field.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Selectors for fields or elements in (possibly nested) `DType`s
5//!
6//! A `Field` indexes a single layer of `DType`, for example: a name in a struct or the element of a
7//! list. A `FieldPath` indexes zero or more layers, for example: the field "child" which is within
8//! the struct field "parent" which is within the struct field "grandparent".
9
10use core::fmt;
11use std::fmt::{Display, Formatter};
12use std::sync::Arc;
13
14use itertools::Itertools;
15use vortex_utils::aliases::hash_set::HashSet;
16
17use crate::{DType, FieldName};
18
19/// Selects a nested type within either a struct or a list.
20#[derive(Clone, Debug, PartialEq, Eq, Hash)]
21#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
22pub enum Field {
23    /// Address a field of a [`crate::DType::Struct`].
24    Name(FieldName),
25    // TODO(connor): Actually make use of this variant.
26    /// Address the element type of a [`crate::DType::List`] or [`crate::DType::FixedSizeList`].
27    ElementType,
28}
29
30impl Field {
31    /// Retrieve a field name if it has one
32    pub fn as_name(&self) -> Option<&str> {
33        match self {
34            Field::Name(name) => Some(name),
35            Field::ElementType => None,
36        }
37    }
38
39    /// Returns true if the field is defined by Name
40    pub fn is_named(&self) -> bool {
41        matches!(self, Field::Name(_))
42    }
43}
44
45impl From<&str> for Field {
46    fn from(value: &str) -> Self {
47        Field::Name(value.into())
48    }
49}
50
51impl From<Arc<str>> for Field {
52    fn from(value: Arc<str>) -> Self {
53        Self::Name(value)
54    }
55}
56
57impl Display for Field {
58    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
59        match self {
60            Field::Name(name) => write!(f, "${name}"),
61            Field::ElementType => write!(f, "[]"),
62        }
63    }
64}
65
66/// A sequence of field selectors representing a path through zero or more layers of `DType`.
67///
68/// # Examples
69///
70/// The empty path references the root:
71///
72/// ```
73/// use vortex_dtype::*;
74///
75/// let dtype_i32 = DType::Primitive(PType::I32, Nullability::NonNullable);
76/// assert_eq!(dtype_i32, FieldPath::root().resolve(dtype_i32.clone()).unwrap());
77/// ```
78///
79// TODO(ngates): we should probably reverse the path. Or better yet, store a Arc<[Field]> along
80//  with a positional index to allow cheap step_into.
81#[derive(Clone, Default, Debug, PartialEq, Eq, Hash)]
82#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
83pub struct FieldPath(Vec<Field>);
84
85impl FieldPath {
86    /// The selector for the root (i.e., the top-level struct itself)
87    pub fn root() -> Self {
88        Self::default()
89    }
90
91    /// Constructs a new `FieldPath` from a single field selector (i.e., a direct child field of the top-level struct)
92    pub fn from_name<F: Into<Field>>(name: F) -> Self {
93        Self(vec![name.into()])
94    }
95
96    /// Returns the sequence of field selectors that make up this path
97    pub fn parts(&self) -> &[Field] {
98        &self.0
99    }
100
101    /// Returns whether this path is a root path.
102    pub fn is_root(&self) -> bool {
103        self.0.is_empty()
104    }
105
106    /// Pushes a new field selector to the end of this path
107    pub fn push<F: Into<Field>>(mut self, field: F) -> Self {
108        self.0.push(field.into());
109        self
110    }
111
112    /// Whether the path starts with the given field name
113    /// TODO(joe): handle asserts better.
114    pub fn starts_with_field(&self, field: &Field) -> bool {
115        assert!(matches!(field, Field::Name(_)));
116        let first = self.0.first();
117        assert!(matches!(first, Some(Field::Name(_))));
118        first.is_some_and(|f| f == field)
119    }
120
121    /// Steps into the next field in the path
122    pub fn step_into(mut self) -> Option<Self> {
123        if self.0.is_empty() {
124            return None;
125        }
126        self.0 = self.0.into_iter().skip(1).collect();
127        Some(self)
128    }
129
130    /// The dtype, within the given type, to which this field path refers.
131    ///
132    /// Note that a nullable DType may contain a non-nullable DType. This function returns the
133    /// literal nullability of the child.
134    ///
135    /// # Examples
136    ///
137    /// Extract the type of the "b" field from `struct{a: list(struct{b: u32})?}`:
138    ///
139    /// ```
140    /// use std::sync::Arc;
141    ///
142    /// use vortex_dtype::*;
143    /// use vortex_dtype::Nullability::*;
144    ///
145    /// let dtype = DType::Struct(
146    ///     StructFields::from_iter([(
147    ///         "a",
148    ///         DType::List(
149    ///             Arc::new(DType::Struct(
150    ///                 StructFields::from_iter([(
151    ///                     "b",
152    ///                     DType::Primitive(PType::U32, NonNullable),
153    ///                 )]),
154    ///                 NonNullable,
155    ///             )),
156    ///             Nullable,
157    ///         ),
158    ///     )]),
159    ///     NonNullable,
160    /// );
161    ///
162    /// let path = FieldPath::from(vec![Field::from("a"), Field::ElementType, Field::from("b")]);
163    /// let resolved = path.resolve(dtype).unwrap();
164    /// assert_eq!(resolved, DType::Primitive(PType::U32, NonNullable));
165    /// ```
166    pub fn resolve(&self, mut dtype: DType) -> Option<DType> {
167        for field in &self.0 {
168            dtype = match (dtype, field) {
169                (DType::Struct(fields, _), Field::Name(name)) => fields.field(name)?,
170                (DType::List(element_dtype, _), Field::ElementType) => {
171                    element_dtype.as_ref().clone()
172                }
173                (..) => return None,
174            }
175        }
176
177        Some(dtype)
178    }
179
180    /// Does the field referenced by the field path exist in the given dtype?
181    pub fn exists_in(&self, dtype: DType) -> bool {
182        // Indexing a struct type always allocates anyway.
183        self.resolve(dtype).is_some()
184    }
185}
186
187impl FromIterator<Field> for FieldPath {
188    fn from_iter<T: IntoIterator<Item = Field>>(iter: T) -> Self {
189        FieldPath(iter.into_iter().collect())
190    }
191}
192
193impl From<Field> for FieldPath {
194    fn from(value: Field) -> Self {
195        FieldPath(vec![value])
196    }
197}
198
199impl From<Vec<Field>> for FieldPath {
200    fn from(value: Vec<Field>) -> Self {
201        FieldPath(value)
202    }
203}
204
205impl Display for FieldPath {
206    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
207        Display::fmt(&self.0.iter().format("."), f)
208    }
209}
210
211#[derive(Default, Clone, Debug)]
212/// Contains a set of field paths, and can answer an efficient field path contains queries.
213pub struct FieldPathSet {
214    /// While this is currently a set wrapper it can be replaced with a trie.
215    // TODO(joe): this can be replaced with a `FieldPath` trie
216    set: HashSet<FieldPath>,
217}
218
219impl FieldPathSet {
220    /// Checks if a set contains a field path
221    pub fn contains(&self, path: &FieldPath) -> bool {
222        self.set.contains(path)
223    }
224}
225
226impl FromIterator<FieldPath> for FieldPathSet {
227    fn from_iter<T: IntoIterator<Item = FieldPath>>(iter: T) -> Self {
228        let set = HashSet::from_iter(iter);
229        Self { set }
230    }
231}
232
233#[cfg(test)]
234mod tests {
235    use super::*;
236    use crate::Nullability::*;
237    use crate::{DType, PType, StructFields};
238
239    #[test]
240    fn test_field_path() {
241        let path = FieldPath::from_name("A").push("B").push("C");
242        assert_eq!(path.to_string(), "$A.$B.$C");
243
244        let fields = vec!["A", "B", "C"]
245            .into_iter()
246            .map(Field::from)
247            .collect_vec();
248        assert_eq!(path.parts(), &fields);
249
250        let vec_path = FieldPath::from(fields);
251        assert_eq!(vec_path.to_string(), "$A.$B.$C");
252        assert_eq!(path, vec_path);
253    }
254
255    #[test]
256    fn nested_field_single_level() {
257        let a_type = DType::Primitive(PType::I32, NonNullable);
258        let dtype = DType::struct_(
259            [("a", a_type.clone()), ("b", DType::Bool(Nullable))],
260            NonNullable,
261        );
262        let path = FieldPath::from_name("a");
263        assert_eq!(a_type, path.resolve(dtype.clone()).unwrap());
264        assert!(path.exists_in(dtype));
265    }
266
267    #[test]
268    fn nested_field_two_level() {
269        let inner = DType::struct_(
270            [
271                ("inner_a", DType::Primitive(PType::U8, NonNullable)),
272                ("inner_b", DType::Bool(Nullable)),
273            ],
274            NonNullable,
275        );
276
277        let outer = DType::Struct(
278            StructFields::from_iter([("outer_a", DType::Bool(NonNullable)), ("outer_b", inner)]),
279            NonNullable,
280        );
281
282        let path = FieldPath::from_name("outer_b").push("inner_a");
283        let dtype = path.resolve(outer.clone()).unwrap();
284
285        assert_eq!(dtype, DType::Primitive(PType::U8, NonNullable));
286        assert!(path.exists_in(outer));
287    }
288
289    #[test]
290    fn nested_field_deep_nested() {
291        let level1 = DType::struct_(
292            [(
293                "a",
294                DType::struct_(
295                    [(
296                        "b",
297                        DType::list(
298                            DType::struct_(
299                                [("c", DType::Primitive(PType::F64, Nullable))],
300                                NonNullable,
301                            ),
302                            Nullable,
303                        ),
304                    )],
305                    NonNullable,
306                ),
307            )],
308            NonNullable,
309        );
310
311        let path = FieldPath::from_name("a")
312            .push("b")
313            .push(Field::ElementType)
314            .push("c");
315        let dtype = path.resolve(level1.clone()).unwrap();
316
317        assert_eq!(dtype, DType::Primitive(PType::F64, Nullable));
318        assert!(path.exists_in(level1.clone()));
319
320        let path = FieldPath::from_name("a")
321            .push("b")
322            .push("c")
323            .push(Field::ElementType);
324        assert!(path.resolve(level1.clone()).is_none());
325        assert!(!path.exists_in(level1.clone()));
326
327        let path = FieldPath::from_name("a")
328            .push(Field::ElementType)
329            .push("b")
330            .push("c");
331        assert!(path.resolve(level1.clone()).is_none());
332        assert!(!path.exists_in(level1.clone()));
333
334        let path = FieldPath::from_name(Field::ElementType)
335            .push("a")
336            .push("b")
337            .push("c");
338        assert!(path.resolve(level1.clone()).is_none());
339        assert!(!path.exists_in(level1));
340    }
341
342    #[test]
343    fn nested_field_not_found() {
344        let dtype = DType::struct_([("a", DType::Bool(NonNullable))], NonNullable);
345        let path = FieldPath::from_name("b");
346        assert!(path.resolve(dtype.clone()).is_none());
347        assert!(!path.exists_in(dtype.clone()));
348
349        let path = FieldPath::from(Field::ElementType);
350        assert!(path.resolve(dtype.clone()).is_none());
351        assert!(!path.exists_in(dtype));
352    }
353
354    #[test]
355    fn nested_field_non_struct_intermediate() {
356        let dtype = DType::struct_(
357            [("a", DType::Primitive(PType::I32, NonNullable))],
358            NonNullable,
359        );
360        let path = FieldPath::from_name("a").push("b");
361        assert!(path.resolve(dtype.clone()).is_none());
362        assert!(!path.exists_in(dtype.clone()));
363
364        let path = FieldPath::from_name("a").push(Field::ElementType);
365        assert!(path.resolve(dtype.clone()).is_none());
366        assert!(!path.exists_in(dtype));
367    }
368}