Skip to main content

vortex_dtype/
field.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Selectors for fields or elements in (possibly nested) `DType`s
5//!
6//! A `Field` indexes a single layer of `DType`, for example: a name in a struct or the element of a
7//! list. A `FieldPath` indexes zero or more layers, for example: the field "child" which is within
8//! the struct field "parent" which is within the struct field "grandparent".
9
10use core::fmt;
11use std::fmt::Display;
12use std::fmt::Formatter;
13use std::sync::Arc;
14
15use itertools::Itertools;
16use vortex_utils::aliases::hash_set::HashSet;
17
18use crate::DType;
19use crate::FieldName;
20
21/// Selects a nested type within either a struct or a list.
22#[derive(Clone, Debug, PartialEq, Eq, Hash)]
23#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
24pub enum Field {
25    /// Address a field of a [`crate::DType::Struct`].
26    Name(FieldName),
27    // TODO(connor)[FixedSizeList]: Actually make use of this variant after `FixedSizeList` is
28    // implemented.
29    /// Address the element type of a [`crate::DType::List`] or [`crate::DType::FixedSizeList`].
30    ElementType,
31}
32
33impl Field {
34    /// Retrieve a field name if it has one
35    pub fn as_name(&self) -> Option<&str> {
36        match self {
37            Field::Name(name) => Some(name.as_ref()),
38            Field::ElementType => None,
39        }
40    }
41
42    /// Returns true if the field is defined by Name
43    pub fn is_named(&self) -> bool {
44        matches!(self, Field::Name(_))
45    }
46}
47
48impl From<&str> for Field {
49    fn from(value: &str) -> Self {
50        Field::Name(value.into())
51    }
52}
53
54impl From<Arc<str>> for Field {
55    fn from(value: Arc<str>) -> Self {
56        Self::Name(FieldName::from(value))
57    }
58}
59
60impl From<FieldName> for Field {
61    fn from(value: FieldName) -> Self {
62        Self::Name(value)
63    }
64}
65
66impl Display for Field {
67    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
68        match self {
69            Field::Name(name) => write!(f, "${name}"),
70            Field::ElementType => write!(f, "[]"),
71        }
72    }
73}
74
75/// A sequence of field selectors representing a path through zero or more layers of `DType`.
76///
77/// # Examples
78///
79/// The empty path references the root:
80///
81/// ```
82/// use vortex_dtype::*;
83///
84/// let dtype_i32 = DType::Primitive(PType::I32, Nullability::NonNullable);
85/// assert_eq!(dtype_i32, FieldPath::root().resolve(dtype_i32.clone()).unwrap());
86/// ```
87///
88// TODO(ngates): we should probably reverse the path. Or better yet, store a Arc<[Field]> along
89//  with a positional index to allow cheap step_into.
90#[derive(Clone, Default, Debug, PartialEq, Eq, Hash)]
91#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
92pub struct FieldPath(Vec<Field>);
93
94/// A helpful constructor for creating `FieldPath`s to nested
95/// struct fields of the format `field_path!(x.y.z)`
96#[macro_export]
97macro_rules! field_path {
98    ($front:ident) => {{
99        $crate::FieldPath::from_name(stringify!($front))
100    }};
101    ($front:ident $(. $rest:ident)+) => {{
102        $crate::FieldPath::from_iter([
103            $crate::Field::from(stringify!($front)),
104            $($crate::Field::from(stringify!($rest))),+
105        ])
106    }};
107}
108
109impl FieldPath {
110    /// The selector for the root (i.e., the top-level struct itself)
111    pub fn root() -> Self {
112        Self::default()
113    }
114
115    /// Constructs a new `FieldPath` from a single field selector (i.e., a direct child field of the top-level struct)
116    pub fn from_name<F: Into<FieldName>>(name: F) -> Self {
117        Self(vec![Field::Name(name.into())])
118    }
119
120    /// Returns the sequence of field selectors that make up this path
121    pub fn parts(&self) -> &[Field] {
122        &self.0
123    }
124
125    /// Returns whether this path is a root path.
126    pub fn is_root(&self) -> bool {
127        self.0.is_empty()
128    }
129
130    /// Pushes a new field selector to the end of this path
131    pub fn push<F: Into<Field>>(mut self, field: F) -> Self {
132        self.0.push(field.into());
133        self
134    }
135
136    /// Whether the path starts with the given field name
137    /// TODO(joe): handle asserts better.
138    pub fn starts_with_field(&self, field: &Field) -> bool {
139        assert!(matches!(field, Field::Name(_)));
140        let first = self.0.first();
141        assert!(matches!(first, Some(Field::Name(_))));
142        first.is_some_and(|f| f == field)
143    }
144
145    /// Steps into the next field in the path
146    pub fn step_into(mut self) -> Option<Self> {
147        if self.0.is_empty() {
148            return None;
149        }
150        self.0 = self.0.into_iter().skip(1).collect();
151        Some(self)
152    }
153
154    /// The dtype, within the given type, to which this field path refers.
155    ///
156    /// Note that a nullable DType may contain a non-nullable DType. This function returns the
157    /// literal nullability of the child.
158    ///
159    /// # Examples
160    ///
161    /// Extract the type of the "b" field from `struct{a: list(struct{b: u32})?}`:
162    ///
163    /// ```
164    /// use std::sync::Arc;
165    ///
166    /// use vortex_dtype::*;
167    /// use vortex_dtype::Nullability::*;
168    ///
169    /// let dtype = DType::Struct(
170    ///     StructFields::from_iter([(
171    ///         "a",
172    ///         DType::List(
173    ///             Arc::new(DType::Struct(
174    ///                 StructFields::from_iter([(
175    ///                     "b",
176    ///                     DType::Primitive(PType::U32, NonNullable),
177    ///                 )]),
178    ///                 NonNullable,
179    ///             )),
180    ///             Nullable,
181    ///         ),
182    ///     )]),
183    ///     NonNullable,
184    /// );
185    ///
186    /// let path = FieldPath::from(vec![Field::from("a"), Field::ElementType, Field::from("b")]);
187    /// let resolved = path.resolve(dtype).unwrap();
188    /// assert_eq!(resolved, DType::Primitive(PType::U32, NonNullable));
189    /// ```
190    pub fn resolve(&self, mut dtype: DType) -> Option<DType> {
191        for field in &self.0 {
192            dtype = match (dtype, field) {
193                (DType::Struct(fields, _), Field::Name(name)) => fields.field(name)?,
194                (DType::List(element_dtype, _), Field::ElementType) => {
195                    element_dtype.as_ref().clone()
196                }
197                (..) => return None,
198            }
199        }
200
201        Some(dtype)
202    }
203
204    /// Does the field referenced by the field path exist in the given dtype?
205    pub fn exists_in(&self, dtype: DType) -> bool {
206        // Indexing a struct type always allocates anyway.
207        self.resolve(dtype).is_some()
208    }
209
210    /// Returns true if this path overlaps with another path (i.e., one is a prefix of the other).
211    pub fn overlap(&self, other: &FieldPath) -> bool {
212        let min_len = self.0.len().min(other.0.len());
213        self.0.iter().take(min_len).eq(other.0.iter().take(min_len))
214    }
215}
216
217impl FromIterator<Field> for FieldPath {
218    fn from_iter<T: IntoIterator<Item = Field>>(iter: T) -> Self {
219        FieldPath(iter.into_iter().collect())
220    }
221}
222
223impl From<Field> for FieldPath {
224    fn from(value: Field) -> Self {
225        FieldPath(vec![value])
226    }
227}
228
229impl From<Vec<Field>> for FieldPath {
230    fn from(value: Vec<Field>) -> Self {
231        FieldPath(value)
232    }
233}
234
235impl Display for FieldPath {
236    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
237        Display::fmt(&self.0.iter().format("."), f)
238    }
239}
240
241#[derive(Default, Clone, Debug)]
242/// Contains a set of field paths, and can answer an efficient field path contains queries.
243pub struct FieldPathSet {
244    /// While this is currently a set wrapper it can be replaced with a trie.
245    // TODO(joe): this can be replaced with a `FieldPath` trie
246    set: HashSet<FieldPath>,
247}
248
249impl FieldPathSet {
250    /// Checks if a set contains a field path
251    pub fn contains(&self, path: &FieldPath) -> bool {
252        self.set.contains(path)
253    }
254}
255
256impl FromIterator<FieldPath> for FieldPathSet {
257    fn from_iter<T: IntoIterator<Item = FieldPath>>(iter: T) -> Self {
258        let set = HashSet::from_iter(iter);
259        Self { set }
260    }
261}
262
263#[cfg(test)]
264mod tests {
265    use super::*;
266    use crate::DType;
267    use crate::Nullability::*;
268    use crate::PType;
269    use crate::StructFields;
270
271    #[test]
272    fn test_field_path() {
273        let path = FieldPath::from_name("A").push("B").push("C");
274        assert_eq!(path.to_string(), "$A.$B.$C");
275
276        let fields = vec!["A", "B", "C"]
277            .into_iter()
278            .map(Field::from)
279            .collect_vec();
280        assert_eq!(path.parts(), &fields);
281
282        let vec_path = FieldPath::from(fields);
283        assert_eq!(vec_path.to_string(), "$A.$B.$C");
284        assert_eq!(path, vec_path);
285    }
286
287    #[test]
288    fn nested_field_single_level() {
289        let a_type = DType::Primitive(PType::I32, NonNullable);
290        let dtype = DType::struct_(
291            [("a", a_type.clone()), ("b", DType::Bool(Nullable))],
292            NonNullable,
293        );
294        let path = FieldPath::from_name("a");
295        assert_eq!(a_type, path.resolve(dtype.clone()).unwrap());
296        assert!(path.exists_in(dtype));
297    }
298
299    #[test]
300    fn nested_field_two_level() {
301        let inner = DType::struct_(
302            [
303                ("inner_a", DType::Primitive(PType::U8, NonNullable)),
304                ("inner_b", DType::Bool(Nullable)),
305            ],
306            NonNullable,
307        );
308
309        let outer = DType::Struct(
310            StructFields::from_iter([("outer_a", DType::Bool(NonNullable)), ("outer_b", inner)]),
311            NonNullable,
312        );
313
314        let path = FieldPath::from_name("outer_b").push("inner_a");
315        let dtype = path.resolve(outer.clone()).unwrap();
316
317        assert_eq!(dtype, DType::Primitive(PType::U8, NonNullable));
318        assert!(path.exists_in(outer));
319    }
320
321    #[test]
322    fn nested_field_deep_nested() {
323        let level1 = DType::struct_(
324            [(
325                "a",
326                DType::struct_(
327                    [(
328                        "b",
329                        DType::list(
330                            DType::struct_(
331                                [("c", DType::Primitive(PType::F64, Nullable))],
332                                NonNullable,
333                            ),
334                            Nullable,
335                        ),
336                    )],
337                    NonNullable,
338                ),
339            )],
340            NonNullable,
341        );
342
343        let path = FieldPath::from_name("a")
344            .push("b")
345            .push(Field::ElementType)
346            .push("c");
347        let dtype = path.resolve(level1.clone()).unwrap();
348
349        assert_eq!(dtype, DType::Primitive(PType::F64, Nullable));
350        assert!(path.exists_in(level1.clone()));
351
352        let path = FieldPath::from_name("a")
353            .push("b")
354            .push("c")
355            .push(Field::ElementType);
356        assert!(path.resolve(level1.clone()).is_none());
357        assert!(!path.exists_in(level1.clone()));
358
359        let path = FieldPath::from_name("a")
360            .push(Field::ElementType)
361            .push("b")
362            .push("c");
363        assert!(path.resolve(level1.clone()).is_none());
364        assert!(!path.exists_in(level1.clone()));
365
366        let path = FieldPath::root().push("a").push("b").push("c");
367        assert!(path.resolve(level1.clone()).is_none());
368        assert!(!path.exists_in(level1));
369    }
370
371    #[test]
372    fn nested_field_not_found() {
373        let dtype = DType::struct_([("a", DType::Bool(NonNullable))], NonNullable);
374        let path = field_path!(b);
375        assert!(path.resolve(dtype.clone()).is_none());
376        assert!(!path.exists_in(dtype.clone()));
377
378        let path = FieldPath::from(Field::ElementType);
379        assert!(path.resolve(dtype.clone()).is_none());
380        assert!(!path.exists_in(dtype));
381    }
382
383    #[test]
384    fn nested_field_non_struct_intermediate() {
385        let dtype = DType::struct_(
386            [("a", DType::Primitive(PType::I32, NonNullable))],
387            NonNullable,
388        );
389        let path = field_path!(a.b);
390        assert!(path.resolve(dtype.clone()).is_none());
391        assert!(!path.exists_in(dtype.clone()));
392
393        let path = FieldPath::from_name("a").push(Field::ElementType);
394        assert!(path.resolve(dtype.clone()).is_none());
395        assert!(!path.exists_in(dtype));
396    }
397
398    #[test]
399    fn test_overlap_positive() {
400        let path1 = field_path!(a.b.c);
401        let path2 = field_path!(a.b);
402        assert!(path1.overlap(&path2));
403        assert!(path2.overlap(&path1));
404
405        let path3 = field_path!(a);
406        assert!(path1.overlap(&path3));
407        assert!(path3.overlap(&path1));
408    }
409
410    #[test]
411    fn test_overlap_negative() {
412        let path1 = field_path!(a.b.c);
413        let path2 = field_path!(a.x.y);
414        assert!(!path1.overlap(&path2));
415        assert!(!path2.overlap(&path1));
416
417        let path3 = field_path!(x);
418        assert!(!path1.overlap(&path3));
419        assert!(!path3.overlap(&path1));
420    }
421}