Skip to main content

vortex_layout/layouts/struct_/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4mod reader;
5
6use std::sync::Arc;
7
8use reader::StructReader;
9use vortex_array::DeserializeMetadata;
10use vortex_array::EmptyMetadata;
11use vortex_array::dtype::DType;
12use vortex_array::dtype::Field;
13use vortex_array::dtype::FieldMask;
14use vortex_array::dtype::Nullability;
15use vortex_array::dtype::StructFields;
16use vortex_error::VortexExpect;
17use vortex_error::VortexResult;
18use vortex_error::vortex_bail;
19use vortex_error::vortex_ensure;
20use vortex_error::vortex_err;
21use vortex_session::SessionExt;
22use vortex_session::VortexSession;
23use vortex_session::registry::ReadContext;
24
25use crate::LayoutChildType;
26use crate::LayoutEncodingRef;
27use crate::LayoutId;
28use crate::LayoutReaderRef;
29use crate::LayoutRef;
30use crate::VTable;
31use crate::children::LayoutChildren;
32use crate::children::OwnedLayoutChildren;
33use crate::segments::SegmentId;
34use crate::segments::SegmentSource;
35use crate::vtable;
36
37vtable!(Struct);
38
39impl VTable for Struct {
40    type Layout = StructLayout;
41    type Encoding = StructLayoutEncoding;
42    type Metadata = EmptyMetadata;
43
44    fn id(_encoding: &Self::Encoding) -> LayoutId {
45        LayoutId::new("vortex.struct")
46    }
47
48    fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
49        LayoutEncodingRef::new_ref(StructLayoutEncoding.as_ref())
50    }
51
52    fn row_count(layout: &Self::Layout) -> u64 {
53        layout.row_count
54    }
55
56    fn dtype(layout: &Self::Layout) -> &DType {
57        &layout.dtype
58    }
59
60    fn metadata(_layout: &Self::Layout) -> Self::Metadata {
61        EmptyMetadata
62    }
63
64    fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
65        vec![]
66    }
67
68    fn nchildren(layout: &Self::Layout) -> usize {
69        let validity_children = if layout.dtype.is_nullable() { 1 } else { 0 };
70        layout.struct_fields().nfields() + validity_children
71    }
72
73    fn child(layout: &Self::Layout, index: usize) -> VortexResult<LayoutRef> {
74        let schema_index = if layout.dtype.is_nullable() {
75            index.saturating_sub(1)
76        } else {
77            index
78        };
79
80        let child_dtype = if index == 0 && layout.dtype.is_nullable() {
81            DType::Bool(Nullability::NonNullable)
82        } else {
83            layout
84                .struct_fields()
85                .field_by_index(schema_index)
86                .ok_or_else(|| vortex_err!("Missing field {schema_index}"))?
87        };
88
89        layout.children.child(index, &child_dtype)
90    }
91
92    fn child_type(layout: &Self::Layout, idx: usize) -> LayoutChildType {
93        let schema_index = if layout.dtype.is_nullable() {
94            idx.saturating_sub(1)
95        } else {
96            idx
97        };
98
99        if idx == 0 && layout.dtype.is_nullable() {
100            LayoutChildType::Auxiliary("validity".into())
101        } else {
102            LayoutChildType::Field(
103                layout
104                    .struct_fields()
105                    .field_name(schema_index)
106                    .vortex_expect("Field index out of bounds")
107                    .clone(),
108            )
109        }
110    }
111
112    fn new_reader(
113        layout: &Self::Layout,
114        name: Arc<str>,
115        segment_source: Arc<dyn SegmentSource>,
116        session: &VortexSession,
117        ctx: &crate::LayoutReaderContext,
118    ) -> VortexResult<LayoutReaderRef> {
119        Ok(Arc::new(StructReader::try_new(
120            layout.clone(),
121            name,
122            segment_source,
123            session.session(),
124            ctx.clone(),
125        )?))
126    }
127
128    fn build(
129        _encoding: &Self::Encoding,
130        dtype: &DType,
131        row_count: u64,
132        _metadata: &<Self::Metadata as DeserializeMetadata>::Output,
133        _segment_ids: Vec<SegmentId>,
134        children: &dyn LayoutChildren,
135        _ctx: &ReadContext,
136    ) -> VortexResult<Self::Layout> {
137        let struct_dt = dtype
138            .as_struct_fields_opt()
139            .ok_or_else(|| vortex_err!("Expected struct dtype"))?;
140
141        let expected_children = struct_dt.nfields() + (dtype.is_nullable() as usize);
142        vortex_ensure!(
143            children.nchildren() == expected_children,
144            "Struct layout has {} children, but dtype has {} fields",
145            children.nchildren(),
146            struct_dt.nfields()
147        );
148
149        Ok(StructLayout {
150            row_count,
151            dtype: dtype.clone(),
152            children: children.to_arc(),
153        })
154    }
155
156    fn with_children(layout: &mut Self::Layout, children: Vec<LayoutRef>) -> VortexResult<()> {
157        let struct_dt = layout
158            .dtype
159            .as_struct_fields_opt()
160            .ok_or_else(|| vortex_err!("Expected struct dtype"))?;
161
162        let expected_children = struct_dt.nfields() + (layout.dtype.is_nullable() as usize);
163        vortex_ensure!(
164            children.len() == expected_children,
165            "StructLayout expects {} children, got {}",
166            expected_children,
167            children.len()
168        );
169
170        layout.children = OwnedLayoutChildren::layout_children(children);
171        Ok(())
172    }
173}
174
175#[derive(Debug)]
176pub struct StructLayoutEncoding;
177
178/// Decomposes a struct-typed column into one child per field, enabling columnar projection.
179///
180/// Queries that only need a subset of fields can skip reading the rest entirely.
181#[derive(Clone, Debug)]
182pub struct StructLayout {
183    row_count: u64,
184    dtype: DType,
185    children: Arc<dyn LayoutChildren>,
186}
187
188impl StructLayout {
189    pub fn new(row_count: u64, dtype: DType, children: Vec<LayoutRef>) -> Self {
190        Self {
191            row_count,
192            dtype,
193            children: OwnedLayoutChildren::layout_children(children),
194        }
195    }
196
197    pub fn struct_fields(&self) -> &StructFields {
198        self.dtype
199            .as_struct_fields_opt()
200            .vortex_expect("Struct layout dtype must be a struct")
201    }
202
203    #[inline]
204    pub fn row_count(&self) -> u64 {
205        self.row_count
206    }
207
208    #[inline]
209    pub fn children(&self) -> &Arc<dyn LayoutChildren> {
210        &self.children
211    }
212
213    pub fn matching_fields<F>(&self, field_mask: &[FieldMask], mut per_child: F) -> VortexResult<()>
214    where
215        F: FnMut(FieldMask, usize) -> VortexResult<()>,
216    {
217        // If the field mask contains an `All` fields, then enumerate all fields.
218        if field_mask.iter().any(|mask| mask.matches_all()) {
219            for idx in 0..self.struct_fields().nfields() {
220                per_child(FieldMask::All, idx)?;
221            }
222            return Ok(());
223        }
224
225        // Enumerate each field in the mask
226        for path in field_mask {
227            let Some(field) = path.starting_field()? else {
228                // skip fields not in mask
229                continue;
230            };
231            let Field::Name(field_name) = field else {
232                vortex_bail!("Expected field name, got {field:?}");
233            };
234            let idx = self
235                .struct_fields()
236                .find(field_name)
237                .ok_or_else(|| vortex_err!("Field not found: {field_name}"))?;
238
239            per_child(path.clone().step_into()?, idx)?;
240        }
241
242        Ok(())
243    }
244}