vortex_dtype/
field.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Selectors for fields or elements in (possibly nested) `DType`s
5//!
6//! A `Field` indexes a single layer of `DType`, for example: a name in a struct or the element of a
7//! list. A `FieldPath` indexes zero or more layers, for example: the field "child" which is within
8//! the struct field "parent" which is within the struct field "grandparent".
9
10use core::fmt;
11use std::fmt::Display;
12use std::fmt::Formatter;
13use std::sync::Arc;
14
15use itertools::Itertools;
16use vortex_utils::aliases::hash_set::HashSet;
17
18use crate::DType;
19use crate::FieldName;
20
21/// Selects a nested type within either a struct or a list.
22#[derive(Clone, Debug, PartialEq, Eq, Hash)]
23#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
24pub enum Field {
25    /// Address a field of a [`crate::DType::Struct`].
26    Name(FieldName),
27    // TODO(connor)[FixedSizeList]: Actually make use of this variant after `FixedSizeList` is
28    // implemented.
29    /// Address the element type of a [`crate::DType::List`] or [`crate::DType::FixedSizeList`].
30    ElementType,
31}
32
33impl Field {
34    /// Retrieve a field name if it has one
35    pub fn as_name(&self) -> Option<&str> {
36        match self {
37            Field::Name(name) => Some(name.as_ref()),
38            Field::ElementType => None,
39        }
40    }
41
42    /// Returns true if the field is defined by Name
43    pub fn is_named(&self) -> bool {
44        matches!(self, Field::Name(_))
45    }
46}
47
48impl From<&str> for Field {
49    fn from(value: &str) -> Self {
50        Field::Name(value.into())
51    }
52}
53
54impl From<Arc<str>> for Field {
55    fn from(value: Arc<str>) -> Self {
56        Self::Name(FieldName::from(value))
57    }
58}
59
60impl From<FieldName> for Field {
61    fn from(value: FieldName) -> Self {
62        Self::Name(value)
63    }
64}
65
66impl Display for Field {
67    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
68        match self {
69            Field::Name(name) => write!(f, "${name}"),
70            Field::ElementType => write!(f, "[]"),
71        }
72    }
73}
74
75/// A sequence of field selectors representing a path through zero or more layers of `DType`.
76///
77/// # Examples
78///
79/// The empty path references the root:
80///
81/// ```
82/// use vortex_dtype::*;
83///
84/// let dtype_i32 = DType::Primitive(PType::I32, Nullability::NonNullable);
85/// assert_eq!(dtype_i32, FieldPath::root().resolve(dtype_i32.clone()).unwrap());
86/// ```
87///
88// TODO(ngates): we should probably reverse the path. Or better yet, store a Arc<[Field]> along
89//  with a positional index to allow cheap step_into.
90#[derive(Clone, Default, Debug, PartialEq, Eq, Hash)]
91#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
92pub struct FieldPath(Vec<Field>);
93
94impl FieldPath {
95    /// The selector for the root (i.e., the top-level struct itself)
96    pub fn root() -> Self {
97        Self::default()
98    }
99
100    /// Constructs a new `FieldPath` from a single field selector (i.e., a direct child field of the top-level struct)
101    pub fn from_name<F: Into<FieldName>>(name: F) -> Self {
102        Self(vec![Field::Name(name.into())])
103    }
104
105    /// Returns the sequence of field selectors that make up this path
106    pub fn parts(&self) -> &[Field] {
107        &self.0
108    }
109
110    /// Returns whether this path is a root path.
111    pub fn is_root(&self) -> bool {
112        self.0.is_empty()
113    }
114
115    /// Pushes a new field selector to the end of this path
116    pub fn push<F: Into<Field>>(mut self, field: F) -> Self {
117        self.0.push(field.into());
118        self
119    }
120
121    /// Whether the path starts with the given field name
122    /// TODO(joe): handle asserts better.
123    pub fn starts_with_field(&self, field: &Field) -> bool {
124        assert!(matches!(field, Field::Name(_)));
125        let first = self.0.first();
126        assert!(matches!(first, Some(Field::Name(_))));
127        first.is_some_and(|f| f == field)
128    }
129
130    /// Steps into the next field in the path
131    pub fn step_into(mut self) -> Option<Self> {
132        if self.0.is_empty() {
133            return None;
134        }
135        self.0 = self.0.into_iter().skip(1).collect();
136        Some(self)
137    }
138
139    /// The dtype, within the given type, to which this field path refers.
140    ///
141    /// Note that a nullable DType may contain a non-nullable DType. This function returns the
142    /// literal nullability of the child.
143    ///
144    /// # Examples
145    ///
146    /// Extract the type of the "b" field from `struct{a: list(struct{b: u32})?}`:
147    ///
148    /// ```
149    /// use std::sync::Arc;
150    ///
151    /// use vortex_dtype::*;
152    /// use vortex_dtype::Nullability::*;
153    ///
154    /// let dtype = DType::Struct(
155    ///     StructFields::from_iter([(
156    ///         "a",
157    ///         DType::List(
158    ///             Arc::new(DType::Struct(
159    ///                 StructFields::from_iter([(
160    ///                     "b",
161    ///                     DType::Primitive(PType::U32, NonNullable),
162    ///                 )]),
163    ///                 NonNullable,
164    ///             )),
165    ///             Nullable,
166    ///         ),
167    ///     )]),
168    ///     NonNullable,
169    /// );
170    ///
171    /// let path = FieldPath::from(vec![Field::from("a"), Field::ElementType, Field::from("b")]);
172    /// let resolved = path.resolve(dtype).unwrap();
173    /// assert_eq!(resolved, DType::Primitive(PType::U32, NonNullable));
174    /// ```
175    pub fn resolve(&self, mut dtype: DType) -> Option<DType> {
176        for field in &self.0 {
177            dtype = match (dtype, field) {
178                (DType::Struct(fields, _), Field::Name(name)) => fields.field(name)?,
179                (DType::List(element_dtype, _), Field::ElementType) => {
180                    element_dtype.as_ref().clone()
181                }
182                (..) => return None,
183            }
184        }
185
186        Some(dtype)
187    }
188
189    /// Does the field referenced by the field path exist in the given dtype?
190    pub fn exists_in(&self, dtype: DType) -> bool {
191        // Indexing a struct type always allocates anyway.
192        self.resolve(dtype).is_some()
193    }
194}
195
196impl FromIterator<Field> for FieldPath {
197    fn from_iter<T: IntoIterator<Item = Field>>(iter: T) -> Self {
198        FieldPath(iter.into_iter().collect())
199    }
200}
201
202impl From<Field> for FieldPath {
203    fn from(value: Field) -> Self {
204        FieldPath(vec![value])
205    }
206}
207
208impl From<Vec<Field>> for FieldPath {
209    fn from(value: Vec<Field>) -> Self {
210        FieldPath(value)
211    }
212}
213
214impl Display for FieldPath {
215    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
216        Display::fmt(&self.0.iter().format("."), f)
217    }
218}
219
220#[derive(Default, Clone, Debug)]
221/// Contains a set of field paths, and can answer an efficient field path contains queries.
222pub struct FieldPathSet {
223    /// While this is currently a set wrapper it can be replaced with a trie.
224    // TODO(joe): this can be replaced with a `FieldPath` trie
225    set: HashSet<FieldPath>,
226}
227
228impl FieldPathSet {
229    /// Checks if a set contains a field path
230    pub fn contains(&self, path: &FieldPath) -> bool {
231        self.set.contains(path)
232    }
233}
234
235impl FromIterator<FieldPath> for FieldPathSet {
236    fn from_iter<T: IntoIterator<Item = FieldPath>>(iter: T) -> Self {
237        let set = HashSet::from_iter(iter);
238        Self { set }
239    }
240}
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245    use crate::DType;
246    use crate::Nullability::*;
247    use crate::PType;
248    use crate::StructFields;
249
250    #[test]
251    fn test_field_path() {
252        let path = FieldPath::from_name("A").push("B").push("C");
253        assert_eq!(path.to_string(), "$A.$B.$C");
254
255        let fields = vec!["A", "B", "C"]
256            .into_iter()
257            .map(Field::from)
258            .collect_vec();
259        assert_eq!(path.parts(), &fields);
260
261        let vec_path = FieldPath::from(fields);
262        assert_eq!(vec_path.to_string(), "$A.$B.$C");
263        assert_eq!(path, vec_path);
264    }
265
266    #[test]
267    fn nested_field_single_level() {
268        let a_type = DType::Primitive(PType::I32, NonNullable);
269        let dtype = DType::struct_(
270            [("a", a_type.clone()), ("b", DType::Bool(Nullable))],
271            NonNullable,
272        );
273        let path = FieldPath::from_name("a");
274        assert_eq!(a_type, path.resolve(dtype.clone()).unwrap());
275        assert!(path.exists_in(dtype));
276    }
277
278    #[test]
279    fn nested_field_two_level() {
280        let inner = DType::struct_(
281            [
282                ("inner_a", DType::Primitive(PType::U8, NonNullable)),
283                ("inner_b", DType::Bool(Nullable)),
284            ],
285            NonNullable,
286        );
287
288        let outer = DType::Struct(
289            StructFields::from_iter([("outer_a", DType::Bool(NonNullable)), ("outer_b", inner)]),
290            NonNullable,
291        );
292
293        let path = FieldPath::from_name("outer_b").push("inner_a");
294        let dtype = path.resolve(outer.clone()).unwrap();
295
296        assert_eq!(dtype, DType::Primitive(PType::U8, NonNullable));
297        assert!(path.exists_in(outer));
298    }
299
300    #[test]
301    fn nested_field_deep_nested() {
302        let level1 = DType::struct_(
303            [(
304                "a",
305                DType::struct_(
306                    [(
307                        "b",
308                        DType::list(
309                            DType::struct_(
310                                [("c", DType::Primitive(PType::F64, Nullable))],
311                                NonNullable,
312                            ),
313                            Nullable,
314                        ),
315                    )],
316                    NonNullable,
317                ),
318            )],
319            NonNullable,
320        );
321
322        let path = FieldPath::from_name("a")
323            .push("b")
324            .push(Field::ElementType)
325            .push("c");
326        let dtype = path.resolve(level1.clone()).unwrap();
327
328        assert_eq!(dtype, DType::Primitive(PType::F64, Nullable));
329        assert!(path.exists_in(level1.clone()));
330
331        let path = FieldPath::from_name("a")
332            .push("b")
333            .push("c")
334            .push(Field::ElementType);
335        assert!(path.resolve(level1.clone()).is_none());
336        assert!(!path.exists_in(level1.clone()));
337
338        let path = FieldPath::from_name("a")
339            .push(Field::ElementType)
340            .push("b")
341            .push("c");
342        assert!(path.resolve(level1.clone()).is_none());
343        assert!(!path.exists_in(level1.clone()));
344
345        let path = FieldPath::root().push("a").push("b").push("c");
346        assert!(path.resolve(level1.clone()).is_none());
347        assert!(!path.exists_in(level1));
348    }
349
350    #[test]
351    fn nested_field_not_found() {
352        let dtype = DType::struct_([("a", DType::Bool(NonNullable))], NonNullable);
353        let path = FieldPath::from_name("b");
354        assert!(path.resolve(dtype.clone()).is_none());
355        assert!(!path.exists_in(dtype.clone()));
356
357        let path = FieldPath::from(Field::ElementType);
358        assert!(path.resolve(dtype.clone()).is_none());
359        assert!(!path.exists_in(dtype));
360    }
361
362    #[test]
363    fn nested_field_non_struct_intermediate() {
364        let dtype = DType::struct_(
365            [("a", DType::Primitive(PType::I32, NonNullable))],
366            NonNullable,
367        );
368        let path = FieldPath::from_name("a").push("b");
369        assert!(path.resolve(dtype.clone()).is_none());
370        assert!(!path.exists_in(dtype.clone()));
371
372        let path = FieldPath::from_name("a").push(Field::ElementType);
373        assert!(path.resolve(dtype.clone()).is_none());
374        assert!(!path.exists_in(dtype));
375    }
376}