1use core::fmt;
11use std::fmt::Display;
12use std::fmt::Formatter;
13use std::sync::Arc;
14
15use itertools::Itertools;
16use vortex_utils::aliases::hash_set::HashSet;
17
18use crate::DType;
19use crate::FieldName;
20
21#[derive(Clone, Debug, PartialEq, Eq, Hash)]
23#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
24pub enum Field {
25 Name(FieldName),
27 ElementType,
31}
32
33impl Field {
34 pub fn as_name(&self) -> Option<&str> {
36 match self {
37 Field::Name(name) => Some(name.as_ref()),
38 Field::ElementType => None,
39 }
40 }
41
42 pub fn is_named(&self) -> bool {
44 matches!(self, Field::Name(_))
45 }
46}
47
48impl From<&str> for Field {
49 fn from(value: &str) -> Self {
50 Field::Name(value.into())
51 }
52}
53
54impl From<Arc<str>> for Field {
55 fn from(value: Arc<str>) -> Self {
56 Self::Name(FieldName::from(value))
57 }
58}
59
60impl From<FieldName> for Field {
61 fn from(value: FieldName) -> Self {
62 Self::Name(value)
63 }
64}
65
66impl Display for Field {
67 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
68 match self {
69 Field::Name(name) => write!(f, "${name}"),
70 Field::ElementType => write!(f, "[]"),
71 }
72 }
73}
74
75#[derive(Clone, Default, Debug, PartialEq, Eq, Hash)]
91#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
92pub struct FieldPath(Vec<Field>);
93
94#[macro_export]
97macro_rules! field_path {
98 ($front:ident) => {{
99 $crate::FieldPath::from_name(stringify!($front))
100 }};
101 ($front:ident $(. $rest:ident)+) => {{
102 $crate::FieldPath::from_iter([
103 $crate::Field::from(stringify!($front)),
104 $($crate::Field::from(stringify!($rest))),+
105 ])
106 }};
107}
108
109impl FieldPath {
110 pub fn root() -> Self {
112 Self::default()
113 }
114
115 pub fn from_name<F: Into<FieldName>>(name: F) -> Self {
117 Self(vec![Field::Name(name.into())])
118 }
119
120 pub fn parts(&self) -> &[Field] {
122 &self.0
123 }
124
125 pub fn is_root(&self) -> bool {
127 self.0.is_empty()
128 }
129
130 pub fn push<F: Into<Field>>(mut self, field: F) -> Self {
132 self.0.push(field.into());
133 self
134 }
135
136 pub fn starts_with_field(&self, field: &Field) -> bool {
139 assert!(matches!(field, Field::Name(_)));
140 let first = self.0.first();
141 assert!(matches!(first, Some(Field::Name(_))));
142 first.is_some_and(|f| f == field)
143 }
144
145 pub fn step_into(mut self) -> Option<Self> {
147 if self.0.is_empty() {
148 return None;
149 }
150 self.0 = self.0.into_iter().skip(1).collect();
151 Some(self)
152 }
153
154 pub fn resolve(&self, mut dtype: DType) -> Option<DType> {
191 for field in &self.0 {
192 dtype = match (dtype, field) {
193 (DType::Struct(fields, _), Field::Name(name)) => fields.field(name)?,
194 (DType::List(element_dtype, _), Field::ElementType) => {
195 element_dtype.as_ref().clone()
196 }
197 (..) => return None,
198 }
199 }
200
201 Some(dtype)
202 }
203
204 pub fn exists_in(&self, dtype: DType) -> bool {
206 self.resolve(dtype).is_some()
208 }
209
210 pub fn overlap(&self, other: &FieldPath) -> bool {
212 let min_len = self.0.len().min(other.0.len());
213 self.0.iter().take(min_len).eq(other.0.iter().take(min_len))
214 }
215}
216
217impl FromIterator<Field> for FieldPath {
218 fn from_iter<T: IntoIterator<Item = Field>>(iter: T) -> Self {
219 FieldPath(iter.into_iter().collect())
220 }
221}
222
223impl From<Field> for FieldPath {
224 fn from(value: Field) -> Self {
225 FieldPath(vec![value])
226 }
227}
228
229impl From<Vec<Field>> for FieldPath {
230 fn from(value: Vec<Field>) -> Self {
231 FieldPath(value)
232 }
233}
234
235impl Display for FieldPath {
236 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
237 Display::fmt(&self.0.iter().format("."), f)
238 }
239}
240
241#[derive(Default, Clone, Debug)]
242pub struct FieldPathSet {
244 set: HashSet<FieldPath>,
247}
248
249impl FieldPathSet {
250 pub fn contains(&self, path: &FieldPath) -> bool {
252 self.set.contains(path)
253 }
254}
255
256impl FromIterator<FieldPath> for FieldPathSet {
257 fn from_iter<T: IntoIterator<Item = FieldPath>>(iter: T) -> Self {
258 let set = HashSet::from_iter(iter);
259 Self { set }
260 }
261}
262
263#[cfg(test)]
264mod tests {
265 use super::*;
266 use crate::DType;
267 use crate::Nullability::*;
268 use crate::PType;
269 use crate::StructFields;
270
271 #[test]
272 fn test_field_path() {
273 let path = FieldPath::from_name("A").push("B").push("C");
274 assert_eq!(path.to_string(), "$A.$B.$C");
275
276 let fields = vec!["A", "B", "C"]
277 .into_iter()
278 .map(Field::from)
279 .collect_vec();
280 assert_eq!(path.parts(), &fields);
281
282 let vec_path = FieldPath::from(fields);
283 assert_eq!(vec_path.to_string(), "$A.$B.$C");
284 assert_eq!(path, vec_path);
285 }
286
287 #[test]
288 fn nested_field_single_level() {
289 let a_type = DType::Primitive(PType::I32, NonNullable);
290 let dtype = DType::struct_(
291 [("a", a_type.clone()), ("b", DType::Bool(Nullable))],
292 NonNullable,
293 );
294 let path = FieldPath::from_name("a");
295 assert_eq!(a_type, path.resolve(dtype.clone()).unwrap());
296 assert!(path.exists_in(dtype));
297 }
298
299 #[test]
300 fn nested_field_two_level() {
301 let inner = DType::struct_(
302 [
303 ("inner_a", DType::Primitive(PType::U8, NonNullable)),
304 ("inner_b", DType::Bool(Nullable)),
305 ],
306 NonNullable,
307 );
308
309 let outer = DType::Struct(
310 StructFields::from_iter([("outer_a", DType::Bool(NonNullable)), ("outer_b", inner)]),
311 NonNullable,
312 );
313
314 let path = FieldPath::from_name("outer_b").push("inner_a");
315 let dtype = path.resolve(outer.clone()).unwrap();
316
317 assert_eq!(dtype, DType::Primitive(PType::U8, NonNullable));
318 assert!(path.exists_in(outer));
319 }
320
321 #[test]
322 fn nested_field_deep_nested() {
323 let level1 = DType::struct_(
324 [(
325 "a",
326 DType::struct_(
327 [(
328 "b",
329 DType::list(
330 DType::struct_(
331 [("c", DType::Primitive(PType::F64, Nullable))],
332 NonNullable,
333 ),
334 Nullable,
335 ),
336 )],
337 NonNullable,
338 ),
339 )],
340 NonNullable,
341 );
342
343 let path = FieldPath::from_name("a")
344 .push("b")
345 .push(Field::ElementType)
346 .push("c");
347 let dtype = path.resolve(level1.clone()).unwrap();
348
349 assert_eq!(dtype, DType::Primitive(PType::F64, Nullable));
350 assert!(path.exists_in(level1.clone()));
351
352 let path = FieldPath::from_name("a")
353 .push("b")
354 .push("c")
355 .push(Field::ElementType);
356 assert!(path.resolve(level1.clone()).is_none());
357 assert!(!path.exists_in(level1.clone()));
358
359 let path = FieldPath::from_name("a")
360 .push(Field::ElementType)
361 .push("b")
362 .push("c");
363 assert!(path.resolve(level1.clone()).is_none());
364 assert!(!path.exists_in(level1.clone()));
365
366 let path = FieldPath::root().push("a").push("b").push("c");
367 assert!(path.resolve(level1.clone()).is_none());
368 assert!(!path.exists_in(level1));
369 }
370
371 #[test]
372 fn nested_field_not_found() {
373 let dtype = DType::struct_([("a", DType::Bool(NonNullable))], NonNullable);
374 let path = field_path!(b);
375 assert!(path.resolve(dtype.clone()).is_none());
376 assert!(!path.exists_in(dtype.clone()));
377
378 let path = FieldPath::from(Field::ElementType);
379 assert!(path.resolve(dtype.clone()).is_none());
380 assert!(!path.exists_in(dtype));
381 }
382
383 #[test]
384 fn nested_field_non_struct_intermediate() {
385 let dtype = DType::struct_(
386 [("a", DType::Primitive(PType::I32, NonNullable))],
387 NonNullable,
388 );
389 let path = field_path!(a.b);
390 assert!(path.resolve(dtype.clone()).is_none());
391 assert!(!path.exists_in(dtype.clone()));
392
393 let path = FieldPath::from_name("a").push(Field::ElementType);
394 assert!(path.resolve(dtype.clone()).is_none());
395 assert!(!path.exists_in(dtype));
396 }
397
398 #[test]
399 fn test_overlap_positive() {
400 let path1 = field_path!(a.b.c);
401 let path2 = field_path!(a.b);
402 assert!(path1.overlap(&path2));
403 assert!(path2.overlap(&path1));
404
405 let path3 = field_path!(a);
406 assert!(path1.overlap(&path3));
407 assert!(path3.overlap(&path1));
408 }
409
410 #[test]
411 fn test_overlap_negative() {
412 let path1 = field_path!(a.b.c);
413 let path2 = field_path!(a.x.y);
414 assert!(!path1.overlap(&path2));
415 assert!(!path2.overlap(&path1));
416
417 let path3 = field_path!(x);
418 assert!(!path1.overlap(&path3));
419 assert!(!path3.overlap(&path1));
420 }
421}