vortex_dtype/
field.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Selectors for fields or elements in (possibly nested) `DType`s
5//!
6//! A `Field` indexes a single layer of `DType`, for example: a name in a struct or the element of a
7//! list. A `FieldPath` indexes zero or more layers, for example: the field "child" which is within
8//! the struct field "parent" which is within the struct field "grandparent".
9
10use core::fmt;
11use std::fmt::{Display, Formatter};
12use std::sync::Arc;
13
14use itertools::Itertools;
15use vortex_utils::aliases::hash_set::HashSet;
16
17use crate::{DType, FieldName};
18
19/// Selects a nested type within either a struct or a list.
20#[derive(Clone, Debug, PartialEq, Eq, Hash)]
21#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
22pub enum Field {
23    /// Address a field of a [`crate::DType::Struct`].
24    Name(FieldName),
25    // TODO(connor)[FixedSizeList]: Actually make use of this variant after `FixedSizeList` is
26    // implemented.
27    /// Address the element type of a [`crate::DType::List`] or [`crate::DType::FixedSizeList`].
28    ElementType,
29}
30
31impl Field {
32    /// Retrieve a field name if it has one
33    pub fn as_name(&self) -> Option<&str> {
34        match self {
35            Field::Name(name) => Some(name.as_ref()),
36            Field::ElementType => None,
37        }
38    }
39
40    /// Returns true if the field is defined by Name
41    pub fn is_named(&self) -> bool {
42        matches!(self, Field::Name(_))
43    }
44}
45
46impl From<&str> for Field {
47    fn from(value: &str) -> Self {
48        Field::Name(value.into())
49    }
50}
51
52impl From<Arc<str>> for Field {
53    fn from(value: Arc<str>) -> Self {
54        Self::Name(FieldName::from(value))
55    }
56}
57
58impl From<FieldName> for Field {
59    fn from(value: FieldName) -> Self {
60        Self::Name(value)
61    }
62}
63
64impl Display for Field {
65    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
66        match self {
67            Field::Name(name) => write!(f, "${name}"),
68            Field::ElementType => write!(f, "[]"),
69        }
70    }
71}
72
73/// A sequence of field selectors representing a path through zero or more layers of `DType`.
74///
75/// # Examples
76///
77/// The empty path references the root:
78///
79/// ```
80/// use vortex_dtype::*;
81///
82/// let dtype_i32 = DType::Primitive(PType::I32, Nullability::NonNullable);
83/// assert_eq!(dtype_i32, FieldPath::root().resolve(dtype_i32.clone()).unwrap());
84/// ```
85///
86// TODO(ngates): we should probably reverse the path. Or better yet, store a Arc<[Field]> along
87//  with a positional index to allow cheap step_into.
88#[derive(Clone, Default, Debug, PartialEq, Eq, Hash)]
89#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
90pub struct FieldPath(Vec<Field>);
91
92impl FieldPath {
93    /// The selector for the root (i.e., the top-level struct itself)
94    pub fn root() -> Self {
95        Self::default()
96    }
97
98    /// Constructs a new `FieldPath` from a single field selector (i.e., a direct child field of the top-level struct)
99    pub fn from_name<F: Into<FieldName>>(name: F) -> Self {
100        Self(vec![Field::Name(name.into())])
101    }
102
103    /// Returns the sequence of field selectors that make up this path
104    pub fn parts(&self) -> &[Field] {
105        &self.0
106    }
107
108    /// Returns whether this path is a root path.
109    pub fn is_root(&self) -> bool {
110        self.0.is_empty()
111    }
112
113    /// Pushes a new field selector to the end of this path
114    pub fn push<F: Into<Field>>(mut self, field: F) -> Self {
115        self.0.push(field.into());
116        self
117    }
118
119    /// Whether the path starts with the given field name
120    /// TODO(joe): handle asserts better.
121    pub fn starts_with_field(&self, field: &Field) -> bool {
122        assert!(matches!(field, Field::Name(_)));
123        let first = self.0.first();
124        assert!(matches!(first, Some(Field::Name(_))));
125        first.is_some_and(|f| f == field)
126    }
127
128    /// Steps into the next field in the path
129    pub fn step_into(mut self) -> Option<Self> {
130        if self.0.is_empty() {
131            return None;
132        }
133        self.0 = self.0.into_iter().skip(1).collect();
134        Some(self)
135    }
136
137    /// The dtype, within the given type, to which this field path refers.
138    ///
139    /// Note that a nullable DType may contain a non-nullable DType. This function returns the
140    /// literal nullability of the child.
141    ///
142    /// # Examples
143    ///
144    /// Extract the type of the "b" field from `struct{a: list(struct{b: u32})?}`:
145    ///
146    /// ```
147    /// use std::sync::Arc;
148    ///
149    /// use vortex_dtype::*;
150    /// use vortex_dtype::Nullability::*;
151    ///
152    /// let dtype = DType::Struct(
153    ///     StructFields::from_iter([(
154    ///         "a",
155    ///         DType::List(
156    ///             Arc::new(DType::Struct(
157    ///                 StructFields::from_iter([(
158    ///                     "b",
159    ///                     DType::Primitive(PType::U32, NonNullable),
160    ///                 )]),
161    ///                 NonNullable,
162    ///             )),
163    ///             Nullable,
164    ///         ),
165    ///     )]),
166    ///     NonNullable,
167    /// );
168    ///
169    /// let path = FieldPath::from(vec![Field::from("a"), Field::ElementType, Field::from("b")]);
170    /// let resolved = path.resolve(dtype).unwrap();
171    /// assert_eq!(resolved, DType::Primitive(PType::U32, NonNullable));
172    /// ```
173    pub fn resolve(&self, mut dtype: DType) -> Option<DType> {
174        for field in &self.0 {
175            dtype = match (dtype, field) {
176                (DType::Struct(fields, _), Field::Name(name)) => fields.field(name)?,
177                (DType::List(element_dtype, _), Field::ElementType) => {
178                    element_dtype.as_ref().clone()
179                }
180                (..) => return None,
181            }
182        }
183
184        Some(dtype)
185    }
186
187    /// Does the field referenced by the field path exist in the given dtype?
188    pub fn exists_in(&self, dtype: DType) -> bool {
189        // Indexing a struct type always allocates anyway.
190        self.resolve(dtype).is_some()
191    }
192}
193
194impl FromIterator<Field> for FieldPath {
195    fn from_iter<T: IntoIterator<Item = Field>>(iter: T) -> Self {
196        FieldPath(iter.into_iter().collect())
197    }
198}
199
200impl From<Field> for FieldPath {
201    fn from(value: Field) -> Self {
202        FieldPath(vec![value])
203    }
204}
205
206impl From<Vec<Field>> for FieldPath {
207    fn from(value: Vec<Field>) -> Self {
208        FieldPath(value)
209    }
210}
211
212impl Display for FieldPath {
213    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
214        Display::fmt(&self.0.iter().format("."), f)
215    }
216}
217
218#[derive(Default, Clone, Debug)]
219/// Contains a set of field paths, and can answer an efficient field path contains queries.
220pub struct FieldPathSet {
221    /// While this is currently a set wrapper it can be replaced with a trie.
222    // TODO(joe): this can be replaced with a `FieldPath` trie
223    set: HashSet<FieldPath>,
224}
225
226impl FieldPathSet {
227    /// Checks if a set contains a field path
228    pub fn contains(&self, path: &FieldPath) -> bool {
229        self.set.contains(path)
230    }
231}
232
233impl FromIterator<FieldPath> for FieldPathSet {
234    fn from_iter<T: IntoIterator<Item = FieldPath>>(iter: T) -> Self {
235        let set = HashSet::from_iter(iter);
236        Self { set }
237    }
238}
239
240#[cfg(test)]
241mod tests {
242    use super::*;
243    use crate::Nullability::*;
244    use crate::{DType, PType, StructFields};
245
246    #[test]
247    fn test_field_path() {
248        let path = FieldPath::from_name("A").push("B").push("C");
249        assert_eq!(path.to_string(), "$A.$B.$C");
250
251        let fields = vec!["A", "B", "C"]
252            .into_iter()
253            .map(Field::from)
254            .collect_vec();
255        assert_eq!(path.parts(), &fields);
256
257        let vec_path = FieldPath::from(fields);
258        assert_eq!(vec_path.to_string(), "$A.$B.$C");
259        assert_eq!(path, vec_path);
260    }
261
262    #[test]
263    fn nested_field_single_level() {
264        let a_type = DType::Primitive(PType::I32, NonNullable);
265        let dtype = DType::struct_(
266            [("a", a_type.clone()), ("b", DType::Bool(Nullable))],
267            NonNullable,
268        );
269        let path = FieldPath::from_name("a");
270        assert_eq!(a_type, path.resolve(dtype.clone()).unwrap());
271        assert!(path.exists_in(dtype));
272    }
273
274    #[test]
275    fn nested_field_two_level() {
276        let inner = DType::struct_(
277            [
278                ("inner_a", DType::Primitive(PType::U8, NonNullable)),
279                ("inner_b", DType::Bool(Nullable)),
280            ],
281            NonNullable,
282        );
283
284        let outer = DType::Struct(
285            StructFields::from_iter([("outer_a", DType::Bool(NonNullable)), ("outer_b", inner)]),
286            NonNullable,
287        );
288
289        let path = FieldPath::from_name("outer_b").push("inner_a");
290        let dtype = path.resolve(outer.clone()).unwrap();
291
292        assert_eq!(dtype, DType::Primitive(PType::U8, NonNullable));
293        assert!(path.exists_in(outer));
294    }
295
296    #[test]
297    fn nested_field_deep_nested() {
298        let level1 = DType::struct_(
299            [(
300                "a",
301                DType::struct_(
302                    [(
303                        "b",
304                        DType::list(
305                            DType::struct_(
306                                [("c", DType::Primitive(PType::F64, Nullable))],
307                                NonNullable,
308                            ),
309                            Nullable,
310                        ),
311                    )],
312                    NonNullable,
313                ),
314            )],
315            NonNullable,
316        );
317
318        let path = FieldPath::from_name("a")
319            .push("b")
320            .push(Field::ElementType)
321            .push("c");
322        let dtype = path.resolve(level1.clone()).unwrap();
323
324        assert_eq!(dtype, DType::Primitive(PType::F64, Nullable));
325        assert!(path.exists_in(level1.clone()));
326
327        let path = FieldPath::from_name("a")
328            .push("b")
329            .push("c")
330            .push(Field::ElementType);
331        assert!(path.resolve(level1.clone()).is_none());
332        assert!(!path.exists_in(level1.clone()));
333
334        let path = FieldPath::from_name("a")
335            .push(Field::ElementType)
336            .push("b")
337            .push("c");
338        assert!(path.resolve(level1.clone()).is_none());
339        assert!(!path.exists_in(level1.clone()));
340
341        let path = FieldPath::root().push("a").push("b").push("c");
342        assert!(path.resolve(level1.clone()).is_none());
343        assert!(!path.exists_in(level1));
344    }
345
346    #[test]
347    fn nested_field_not_found() {
348        let dtype = DType::struct_([("a", DType::Bool(NonNullable))], NonNullable);
349        let path = FieldPath::from_name("b");
350        assert!(path.resolve(dtype.clone()).is_none());
351        assert!(!path.exists_in(dtype.clone()));
352
353        let path = FieldPath::from(Field::ElementType);
354        assert!(path.resolve(dtype.clone()).is_none());
355        assert!(!path.exists_in(dtype));
356    }
357
358    #[test]
359    fn nested_field_non_struct_intermediate() {
360        let dtype = DType::struct_(
361            [("a", DType::Primitive(PType::I32, NonNullable))],
362            NonNullable,
363        );
364        let path = FieldPath::from_name("a").push("b");
365        assert!(path.resolve(dtype.clone()).is_none());
366        assert!(!path.exists_in(dtype.clone()));
367
368        let path = FieldPath::from_name("a").push(Field::ElementType);
369        assert!(path.resolve(dtype.clone()).is_none());
370        assert!(!path.exists_in(dtype));
371    }
372}