vortex_dtype/
field.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
//! Selectors for fields in (possibly nested) `StructDType`s
//!
//! A `Field` can either be a direct child field of the top-level struct (selected by name or index),
//! or a nested field (selected by a sequence of such selectors)

use core::fmt;
use std::fmt::{Display, Formatter};
use std::sync::Arc;

use itertools::Itertools;
use vortex_error::{vortex_err, VortexResult};

use crate::FieldNames;

/// A selector for a field in a struct
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum Field {
    /// A field selector by name
    Name(Arc<str>),
    /// A field selector by index (position)
    Index(usize),
}

impl From<&str> for Field {
    fn from(value: &str) -> Self {
        Field::Name(value.into())
    }
}

impl From<String> for Field {
    fn from(value: String) -> Self {
        Field::Name(value.into())
    }
}

impl From<usize> for Field {
    fn from(value: usize) -> Self {
        Field::Index(value)
    }
}

impl Display for Field {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        match self {
            Field::Name(name) => write!(f, "${name}"),
            Field::Index(idx) => write!(f, "[{idx}]"),
        }
    }
}

impl Field {
    /// Returns true if the field is defined by Name
    pub fn is_named(&self) -> bool {
        matches!(self, Field::Name(_))
    }

    /// Returns true if the field is defined by Index
    pub fn is_indexed(&self) -> bool {
        matches!(self, Field::Index(_))
    }

    /// Convert a field to a named field
    pub fn into_named_field(self, field_names: &FieldNames) -> VortexResult<Self> {
        match self {
            Field::Index(idx) => field_names
                .get(idx)
                .ok_or_else(|| {
                    vortex_err!(
                        "Field index {} out of bounds, it has names {:?}",
                        idx,
                        field_names
                    )
                })
                .cloned()
                .map(Field::Name),
            Field::Name(_) => Ok(self),
        }
    }

    /// Convert a field to an indexed field
    pub fn into_index_field(self, field_names: &FieldNames) -> VortexResult<Self> {
        match self {
            Field::Name(name) => field_names
                .iter()
                .position(|n| *n == name)
                .ok_or_else(|| {
                    vortex_err!(
                        "Field name {} not found, it has names {:?}",
                        name,
                        field_names
                    )
                })
                .map(Field::Index),
            Field::Index(_) => Ok(self),
        }
    }
}

/// A path through a (possibly nested) struct, composed of a sequence of field selectors
// TODO(ngates): wrap `Vec<Field>` in Option for cheaper "root" path.
// TODO(ngates): we should probably reverse the path. Or better yet, store a Arc<[Field]> along
//  with a positional index to allow cheap step_into.
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct FieldPath(Vec<Field>);

impl FieldPath {
    /// The selector for the root (i.e., the top-level struct itself)
    pub fn root() -> Self {
        Self(vec![])
    }

    /// Constructs a new `FieldPath` from a single field selector (i.e., a direct child field of the top-level struct)
    pub fn from_name<F: Into<Field>>(name: F) -> Self {
        Self(vec![name.into()])
    }

    /// Returns the sequence of field selectors that make up this path
    pub fn path(&self) -> &[Field] {
        &self.0
    }

    /// Returns whether this path is a root path.
    pub fn is_root(&self) -> bool {
        self.0.is_empty()
    }

    /// Pushes a new field selector to the end of this path
    pub fn push<F: Into<Field>>(mut self, field: F) -> Self {
        self.0.push(field.into());
        self
    }

    /// Whether the path starts with the given field name
    /// TODO(joe): handle asserts better.
    pub fn starts_with_field(&self, field: &Field) -> bool {
        assert!(matches!(field, Field::Name(_)));
        let first = self.0.first();
        assert!(matches!(first, Some(Field::Name(_))));
        first.is_some_and(|f| f == field)
    }

    /// Steps into the next field in the path
    pub fn step_into(mut self) -> Option<Self> {
        if self.0.is_empty() {
            return None;
        }
        self.0 = self.0.iter().skip(1).cloned().collect();
        Some(self)
    }
}

impl FromIterator<Field> for FieldPath {
    fn from_iter<T: IntoIterator<Item = Field>>(iter: T) -> Self {
        FieldPath(iter.into_iter().collect())
    }
}

impl From<Field> for FieldPath {
    fn from(value: Field) -> Self {
        FieldPath(vec![value])
    }
}

impl From<Vec<Field>> for FieldPath {
    fn from(value: Vec<Field>) -> Self {
        FieldPath(value)
    }
}

impl Display for FieldPath {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        Display::fmt(&self.0.iter().format("."), f)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_field_path() {
        let path = FieldPath::from_name("A").push("B").push("C");
        assert_eq!(path.to_string(), "$A.$B.$C");

        let fields = vec!["A", "B", "C"]
            .into_iter()
            .map(Field::from)
            .collect_vec();
        assert_eq!(path.path(), &fields);

        let vec_path = FieldPath::from(fields);
        assert_eq!(vec_path.to_string(), "$A.$B.$C");
        assert_eq!(path, vec_path);
    }
}