Skip to main content

vortex_layout/
flatbuffers.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::env;
5use std::sync::LazyLock;
6
7use flatbuffers::FlatBufferBuilder;
8use flatbuffers::VerifierOptions;
9use flatbuffers::WIPOffset;
10use flatbuffers::root_with_opts;
11use vortex_array::dtype::DType;
12use vortex_error::VortexResult;
13use vortex_error::vortex_err;
14use vortex_flatbuffers::FlatBuffer;
15use vortex_flatbuffers::FlatBufferRoot;
16use vortex_flatbuffers::WriteFlatBuffer;
17use vortex_flatbuffers::layout;
18use vortex_session::registry::ReadContext;
19
20use crate::Layout;
21use crate::LayoutContext;
22use crate::LayoutRef;
23use crate::children::ViewedLayoutChildren;
24use crate::segments::SegmentId;
25use crate::session::LayoutRegistry;
26
27static LAYOUT_VERIFIER: LazyLock<VerifierOptions> = LazyLock::new(|| {
28    VerifierOptions {
29        // Overridden
30        max_tables: env::var("VORTEX_MAX_LAYOUT_TABLES")
31            .ok()
32            .and_then(|lmt| lmt.parse::<usize>().ok())
33            .unwrap_or(1000000),
34        max_depth: env::var("VORTEX_MAX_LAYOUT_DEPTH")
35            .ok()
36            .and_then(|lmt| lmt.parse::<usize>().ok())
37            .unwrap_or(64),
38        // Defaults from flatbuffers
39        max_apparent_size: 1 << 31,
40        ignore_missing_null_terminator: false,
41    }
42});
43
44/// Parse a [`LayoutRef`] from a layout flatbuffer.
45pub fn layout_from_flatbuffer(
46    flatbuffer: FlatBuffer,
47    dtype: &DType,
48    layout_ctx: &ReadContext,
49    ctx: &ReadContext,
50    layouts: &LayoutRegistry,
51) -> VortexResult<LayoutRef> {
52    layout_from_flatbuffer_with_options(flatbuffer, dtype, layout_ctx, ctx, layouts, false)
53}
54
55/// Parse a [`LayoutRef`] from a layout flatbuffer with unknown-encoding behavior control.
56pub fn layout_from_flatbuffer_with_options(
57    flatbuffer: FlatBuffer,
58    dtype: &DType,
59    layout_ctx: &ReadContext,
60    ctx: &ReadContext,
61    layouts: &LayoutRegistry,
62    allow_unknown: bool,
63) -> VortexResult<LayoutRef> {
64    let fb_layout = root_with_opts::<layout::Layout>(&LAYOUT_VERIFIER, &flatbuffer)?;
65    let encoding_id = layout_ctx
66        .resolve(fb_layout.encoding())
67        .ok_or_else(|| vortex_err!("Invalid encoding ID: {}", fb_layout.encoding()))?;
68    let encoding = layouts.find(&encoding_id);
69
70    if encoding.is_none() && allow_unknown {
71        return foreign_layout_from_fb(fb_layout, dtype, layout_ctx);
72    }
73    let encoding =
74        encoding.ok_or_else(|| vortex_err!("Invalid encoding ID: {}", fb_layout.encoding()))?;
75
76    // SAFETY: we validate the flatbuffer above in the `root` call, and extract a loc.
77    let viewed_children = unsafe {
78        ViewedLayoutChildren::new_unchecked(
79            flatbuffer.clone(),
80            fb_layout._tab.loc(),
81            ctx.clone(),
82            layout_ctx.clone(),
83            layouts.clone(),
84            allow_unknown,
85        )
86    };
87
88    let layout = encoding.build(
89        dtype,
90        fb_layout.row_count(),
91        fb_layout
92            .metadata()
93            .map(|m| m.bytes())
94            .unwrap_or_else(|| &[]),
95        fb_layout
96            .segments()
97            .unwrap_or_default()
98            .iter()
99            .map(SegmentId::from)
100            .collect(),
101        &viewed_children,
102        ctx,
103    )?;
104
105    Ok(layout)
106}
107
108fn foreign_layout_from_fb(
109    fb_layout: layout::Layout<'_>,
110    dtype: &DType,
111    layout_ctx: &ReadContext,
112) -> VortexResult<LayoutRef> {
113    let encoding_id = layout_ctx
114        .resolve(fb_layout.encoding())
115        .ok_or_else(|| vortex_err!("Invalid encoding ID: {}", fb_layout.encoding()))?;
116
117    let children = fb_layout
118        .children()
119        .unwrap_or_default()
120        .iter()
121        .map(|child| foreign_layout_from_fb(child, dtype, layout_ctx))
122        .collect::<VortexResult<Vec<_>>>()?;
123
124    Ok(crate::layouts::foreign::new_foreign_layout(
125        encoding_id,
126        dtype.clone(),
127        fb_layout.row_count(),
128        fb_layout
129            .metadata()
130            .map(|m| m.bytes().to_vec())
131            .unwrap_or_default(),
132        fb_layout
133            .segments()
134            .unwrap_or_default()
135            .iter()
136            .map(SegmentId::from)
137            .collect(),
138        children,
139    ))
140}
141
142impl dyn Layout + '_ {
143    /// Serialize the layout into a [`FlatBufferBuilder`].
144    pub fn flatbuffer_writer<'a>(
145        &'a self,
146        ctx: &'a LayoutContext,
147    ) -> impl WriteFlatBuffer<Target<'a> = layout::Layout<'a>> + FlatBufferRoot + 'a {
148        LayoutFlatBufferWriter { layout: self, ctx }
149    }
150}
151
152/// An adapter struct for writing a layout to a FlatBuffer.
153struct LayoutFlatBufferWriter<'a> {
154    layout: &'a dyn Layout,
155    ctx: &'a LayoutContext,
156}
157
158impl FlatBufferRoot for LayoutFlatBufferWriter<'_> {}
159
160impl WriteFlatBuffer for LayoutFlatBufferWriter<'_> {
161    type Target<'fb> = layout::Layout<'fb>;
162
163    fn write_flatbuffer<'fb>(
164        &self,
165        fbb: &mut FlatBufferBuilder<'fb>,
166    ) -> VortexResult<WIPOffset<Self::Target<'fb>>> {
167        // First we recurse into the children and write them out
168        let child_layouts = self.layout.children()?;
169        let children = child_layouts
170            .iter()
171            .map(|layout| {
172                LayoutFlatBufferWriter {
173                    layout: layout.as_ref(),
174                    ctx: self.ctx,
175                }
176                .write_flatbuffer(fbb)
177            })
178            .collect::<VortexResult<Vec<_>>>()?;
179        let children = (!children.is_empty()).then(|| fbb.create_vector(&children));
180
181        // Next we write out the metadata if it's non-empty.
182        let metadata = self.layout.metadata();
183        let metadata = (!metadata.is_empty()).then(|| fbb.create_vector(&metadata));
184
185        let segments = self
186            .layout
187            .segment_ids()
188            .into_iter()
189            .map(|s| *s)
190            .collect::<Vec<_>>();
191        let segments = (!segments.is_empty()).then(|| fbb.create_vector(&segments));
192
193        // Dictionary-encode the layout ID
194        let encoding = self.ctx.intern(&self.layout.encoding_id()).ok_or_else(|| {
195            vortex_err!(
196                "Failed to intern layout encoding ID: {}",
197                self.layout.encoding_id()
198            )
199        })?;
200
201        Ok(layout::Layout::create(
202            fbb,
203            &layout::LayoutArgs {
204                encoding,
205                row_count: self.layout.row_count(),
206                metadata,
207                children,
208                segments,
209            },
210        ))
211    }
212}
213
214#[cfg(test)]
215mod tests {
216    use flatbuffers::FlatBufferBuilder;
217    use vortex_array::dtype::DType;
218    use vortex_array::dtype::Nullability;
219    use vortex_flatbuffers::layout as fbl;
220    use vortex_session::registry::ReadContext;
221
222    use super::layout_from_flatbuffer_with_options;
223    use crate::LayoutEncodingId;
224    use crate::session::LayoutSession;
225
226    #[test]
227    fn unknown_layout_encoding_allow_unknown() {
228        let mut fbb = FlatBufferBuilder::new();
229
230        let child_metadata = fbb.create_vector(&[9u8]);
231        let child = fbl::Layout::create(
232            &mut fbb,
233            &fbl::LayoutArgs {
234                encoding: 1,
235                row_count: 3,
236                metadata: Some(child_metadata),
237                children: None,
238                segments: None,
239            },
240        );
241
242        let children = fbb.create_vector(&[child]);
243        let metadata = fbb.create_vector(&[1u8, 2, 3]);
244        let segments = fbb.create_vector(&[7u32]);
245        let root = fbl::Layout::create(
246            &mut fbb,
247            &fbl::LayoutArgs {
248                encoding: 0,
249                row_count: 10,
250                metadata: Some(metadata),
251                children: Some(children),
252                segments: Some(segments),
253            },
254        );
255        fbb.finish_minimal(root);
256        let (buf, start) = fbb.collapse();
257        let layout_buffer = vortex_flatbuffers::FlatBuffer::align_from(
258            vortex_buffer::ByteBuffer::from(buf).slice(start..),
259        );
260
261        let layout_ctx = ReadContext::new([
262            LayoutEncodingId::new_ref("vortex.test.foreign_layout"),
263            LayoutEncodingId::new_ref("vortex.test.foreign_child_layout"),
264        ]);
265        let array_ctx = ReadContext::new([]);
266        let layouts = LayoutSession::default().registry().clone();
267
268        let layout = layout_from_flatbuffer_with_options(
269            layout_buffer,
270            &DType::Variant(Nullability::Nullable),
271            &layout_ctx,
272            &array_ctx,
273            &layouts,
274            true,
275        )
276        .unwrap();
277
278        assert_eq!(layout.encoding_id().as_ref(), "vortex.test.foreign_layout");
279        assert_eq!(layout.row_count(), 10);
280        assert_eq!(layout.metadata(), vec![1, 2, 3]);
281        assert_eq!(layout.segment_ids().len(), 1);
282        assert_eq!(*layout.segment_ids()[0], 7);
283        assert_eq!(layout.nchildren(), 1);
284
285        let child = layout.child(0).unwrap();
286        assert_eq!(
287            child.encoding_id().as_ref(),
288            "vortex.test.foreign_child_layout"
289        );
290        assert_eq!(child.metadata(), vec![9]);
291    }
292}