vortex_layout/layouts/struct_/
mod.rs

1mod reader;
2pub mod writer;
3
4use std::collections::BTreeSet;
5use std::sync::Arc;
6
7use reader::StructReader;
8use vortex_array::{ArrayContext, DeserializeMetadata, EmptyMetadata};
9use vortex_dtype::{DType, Field, FieldMask, StructDType};
10use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err, vortex_panic};
11
12use crate::children::{LayoutChildren, OwnedLayoutChildren};
13use crate::segments::{SegmentId, SegmentSource};
14use crate::{
15    LayoutChildType, LayoutEncodingRef, LayoutId, LayoutReaderRef, LayoutRef, VTable, vtable,
16};
17
18vtable!(Struct);
19
20impl VTable for StructVTable {
21    type Layout = StructLayout;
22    type Encoding = StructLayoutEncoding;
23    type Metadata = EmptyMetadata;
24
25    fn id(_encoding: &Self::Encoding) -> LayoutId {
26        LayoutId::new_ref("vortex.struct")
27    }
28
29    fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
30        LayoutEncodingRef::new_ref(StructLayoutEncoding.as_ref())
31    }
32
33    fn row_count(layout: &Self::Layout) -> u64 {
34        layout.row_count
35    }
36
37    fn dtype(layout: &Self::Layout) -> &DType {
38        &layout.dtype
39    }
40
41    fn metadata(_layout: &Self::Layout) -> Self::Metadata {
42        EmptyMetadata
43    }
44
45    fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
46        vec![]
47    }
48
49    fn nchildren(layout: &Self::Layout) -> usize {
50        layout.struct_dtype().nfields()
51    }
52
53    fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
54        layout
55            .children
56            .child(idx, &layout.struct_dtype().field_by_index(idx)?)
57    }
58
59    fn child_type(layout: &Self::Layout, idx: usize) -> LayoutChildType {
60        LayoutChildType::Field(
61            layout
62                .struct_dtype()
63                .field_name(idx)
64                .vortex_expect("Field index out of bounds")
65                .clone(),
66        )
67    }
68
69    fn register_splits(
70        layout: &Self::Layout,
71        field_mask: &[FieldMask],
72        row_offset: u64,
73        splits: &mut BTreeSet<u64>,
74    ) -> VortexResult<()> {
75        layout.matching_fields(field_mask, |mask, idx| {
76            layout
77                .child(idx)?
78                .register_splits(&[mask], row_offset, splits)
79        })
80    }
81
82    fn new_reader(
83        layout: &Self::Layout,
84        name: &Arc<str>,
85        segment_source: &Arc<dyn SegmentSource>,
86        ctx: &ArrayContext,
87    ) -> VortexResult<LayoutReaderRef> {
88        Ok(Arc::new(StructReader::try_new(
89            layout.clone(),
90            name.clone(),
91            segment_source.clone(),
92            ctx.clone(),
93        )?))
94    }
95
96    fn build(
97        _encoding: &Self::Encoding,
98        dtype: &DType,
99        row_count: u64,
100        _metadata: &<Self::Metadata as DeserializeMetadata>::Output,
101        _segment_ids: Vec<SegmentId>,
102        children: &dyn LayoutChildren,
103    ) -> VortexResult<Self::Layout> {
104        let struct_dt = dtype
105            .as_struct()
106            .ok_or_else(|| vortex_err!("Expected struct dtype"))?;
107        if children.nchildren() != struct_dt.nfields() {
108            vortex_bail!(
109                "Struct layout has {} children, but dtype has {} fields",
110                children.nchildren(),
111                struct_dt.nfields()
112            );
113        }
114        Ok(StructLayout {
115            row_count,
116            dtype: dtype.clone(),
117            children: children.to_arc(),
118        })
119    }
120}
121
122#[derive(Debug)]
123pub struct StructLayoutEncoding;
124
125#[derive(Clone, Debug)]
126pub struct StructLayout {
127    row_count: u64,
128    dtype: DType,
129    children: Arc<dyn LayoutChildren>,
130}
131
132impl StructLayout {
133    pub fn new(row_count: u64, dtype: DType, children: Vec<LayoutRef>) -> Self {
134        Self {
135            row_count,
136            dtype,
137            children: OwnedLayoutChildren::layout_children(children),
138        }
139    }
140
141    pub fn struct_dtype(&self) -> &Arc<StructDType> {
142        let DType::Struct(dtype, _) = self.dtype() else {
143            vortex_panic!("Mismatched dtype {} for struct layout", self.dtype());
144        };
145        dtype
146    }
147
148    pub fn matching_fields<F>(&self, field_mask: &[FieldMask], mut per_child: F) -> VortexResult<()>
149    where
150        F: FnMut(FieldMask, usize) -> VortexResult<()>,
151    {
152        // If the field mask contains an `All` fields, then enumerate all fields.
153        if field_mask.iter().any(|mask| mask.matches_all()) {
154            for idx in 0..self.struct_dtype().nfields() {
155                per_child(FieldMask::All, idx)?;
156            }
157            return Ok(());
158        }
159
160        // Enumerate each field in the mask
161        for path in field_mask {
162            let Some(field) = path.starting_field()? else {
163                // skip fields not in mask
164                continue;
165            };
166            let Field::Name(field_name) = field else {
167                vortex_bail!("Expected field name, got {:?}", field);
168            };
169            let idx = self.struct_dtype().find(field_name)?;
170
171            per_child(path.clone().step_into()?, idx)?;
172        }
173
174        Ok(())
175    }
176}