Skip to main content

vortex_layout/
flatbuffers.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::env;
5use std::sync::LazyLock;
6
7use flatbuffers::FlatBufferBuilder;
8use flatbuffers::VerifierOptions;
9use flatbuffers::WIPOffset;
10use flatbuffers::root_with_opts;
11use vortex_array::dtype::DType;
12use vortex_error::VortexResult;
13use vortex_error::vortex_err;
14use vortex_flatbuffers::FlatBuffer;
15use vortex_flatbuffers::FlatBufferRoot;
16use vortex_flatbuffers::WriteFlatBuffer;
17use vortex_flatbuffers::layout;
18use vortex_session::registry::ReadContext;
19
20use crate::Layout;
21use crate::LayoutContext;
22use crate::LayoutRef;
23use crate::children::ViewedLayoutChildren;
24use crate::layouts::foreign::new_foreign_layout;
25use crate::segments::SegmentId;
26use crate::session::LayoutRegistry;
27
28static LAYOUT_VERIFIER: LazyLock<VerifierOptions> = LazyLock::new(|| {
29    VerifierOptions {
30        // Overridden
31        max_tables: env::var("VORTEX_MAX_LAYOUT_TABLES")
32            .ok()
33            .and_then(|lmt| lmt.parse::<usize>().ok())
34            .unwrap_or(1000000),
35        max_depth: env::var("VORTEX_MAX_LAYOUT_DEPTH")
36            .ok()
37            .and_then(|lmt| lmt.parse::<usize>().ok())
38            .unwrap_or(64),
39        // Defaults from flatbuffers
40        max_apparent_size: 1 << 31,
41        ignore_missing_null_terminator: false,
42    }
43});
44
45/// Parse a [`LayoutRef`] from a layout flatbuffer.
46pub fn layout_from_flatbuffer(
47    flatbuffer: FlatBuffer,
48    dtype: &DType,
49    layout_ctx: &ReadContext,
50    ctx: &ReadContext,
51    layouts: &LayoutRegistry,
52) -> VortexResult<LayoutRef> {
53    layout_from_flatbuffer_with_options(flatbuffer, dtype, layout_ctx, ctx, layouts, false)
54}
55
56/// Parse a [`LayoutRef`] from a layout flatbuffer with unknown-encoding behavior control.
57pub fn layout_from_flatbuffer_with_options(
58    flatbuffer: FlatBuffer,
59    dtype: &DType,
60    layout_ctx: &ReadContext,
61    ctx: &ReadContext,
62    layouts: &LayoutRegistry,
63    allow_unknown: bool,
64) -> VortexResult<LayoutRef> {
65    let fb_layout = root_with_opts::<layout::Layout>(&LAYOUT_VERIFIER, &flatbuffer)?;
66    let encoding_id = layout_ctx
67        .resolve(fb_layout.encoding())
68        .ok_or_else(|| vortex_err!("Invalid encoding ID: {}", fb_layout.encoding()))?;
69    let encoding = layouts.find(&encoding_id);
70
71    if encoding.is_none() && allow_unknown {
72        return foreign_layout_from_fb(fb_layout, dtype, layout_ctx);
73    }
74    let encoding =
75        encoding.ok_or_else(|| vortex_err!("Invalid encoding ID: {}", fb_layout.encoding()))?;
76
77    // SAFETY: we validate the flatbuffer above in the `root` call, and extract a loc.
78    let viewed_children = unsafe {
79        ViewedLayoutChildren::new_unchecked(
80            flatbuffer.clone(),
81            fb_layout._tab.loc(),
82            ctx.clone(),
83            layout_ctx.clone(),
84            layouts.clone(),
85            allow_unknown,
86        )
87    };
88
89    let layout = encoding.build(
90        dtype,
91        fb_layout.row_count(),
92        fb_layout
93            .metadata()
94            .map(|m| m.bytes())
95            .unwrap_or_else(|| &[]),
96        fb_layout
97            .segments()
98            .unwrap_or_default()
99            .iter()
100            .map(SegmentId::from)
101            .collect(),
102        &viewed_children,
103        ctx,
104    )?;
105
106    Ok(layout)
107}
108
109fn foreign_layout_from_fb(
110    fb_layout: layout::Layout<'_>,
111    dtype: &DType,
112    layout_ctx: &ReadContext,
113) -> VortexResult<LayoutRef> {
114    let encoding_id = layout_ctx
115        .resolve(fb_layout.encoding())
116        .ok_or_else(|| vortex_err!("Invalid encoding ID: {}", fb_layout.encoding()))?;
117
118    let children = fb_layout
119        .children()
120        .unwrap_or_default()
121        .iter()
122        .map(|child| foreign_layout_from_fb(child, dtype, layout_ctx))
123        .collect::<VortexResult<Vec<_>>>()?;
124
125    Ok(new_foreign_layout(
126        encoding_id,
127        dtype.clone(),
128        fb_layout.row_count(),
129        fb_layout
130            .metadata()
131            .map(|m| m.bytes().to_vec())
132            .unwrap_or_default(),
133        fb_layout
134            .segments()
135            .unwrap_or_default()
136            .iter()
137            .map(SegmentId::from)
138            .collect(),
139        children,
140    ))
141}
142
143impl dyn Layout + '_ {
144    /// Serialize the layout into a [`FlatBufferBuilder`].
145    pub fn flatbuffer_writer<'a>(
146        &'a self,
147        ctx: &'a LayoutContext,
148    ) -> impl WriteFlatBuffer<Target<'a> = layout::Layout<'a>> + FlatBufferRoot + 'a {
149        LayoutFlatBufferWriter { layout: self, ctx }
150    }
151}
152
153/// An adapter struct for writing a layout to a FlatBuffer.
154struct LayoutFlatBufferWriter<'a> {
155    layout: &'a dyn Layout,
156    ctx: &'a LayoutContext,
157}
158
159impl FlatBufferRoot for LayoutFlatBufferWriter<'_> {}
160
161impl WriteFlatBuffer for LayoutFlatBufferWriter<'_> {
162    type Target<'fb> = layout::Layout<'fb>;
163
164    fn write_flatbuffer<'fb>(
165        &self,
166        fbb: &mut FlatBufferBuilder<'fb>,
167    ) -> VortexResult<WIPOffset<Self::Target<'fb>>> {
168        // First we recurse into the children and write them out
169        let child_layouts = self.layout.children()?;
170        let children = child_layouts
171            .iter()
172            .map(|layout| {
173                LayoutFlatBufferWriter {
174                    layout: layout.as_ref(),
175                    ctx: self.ctx,
176                }
177                .write_flatbuffer(fbb)
178            })
179            .collect::<VortexResult<Vec<_>>>()?;
180        let children = (!children.is_empty()).then(|| fbb.create_vector(&children));
181
182        // Next we write out the metadata if it's non-empty.
183        let metadata = self.layout.metadata();
184        let metadata = (!metadata.is_empty()).then(|| fbb.create_vector(&metadata));
185
186        let segments = self
187            .layout
188            .segment_ids()
189            .into_iter()
190            .map(|s| *s)
191            .collect::<Vec<_>>();
192        let segments = (!segments.is_empty()).then(|| fbb.create_vector(&segments));
193
194        // Dictionary-encode the layout ID
195        let encoding = self.ctx.intern(&self.layout.encoding_id()).ok_or_else(|| {
196            vortex_err!(
197                "Failed to intern layout encoding ID: {}",
198                self.layout.encoding_id()
199            )
200        })?;
201
202        Ok(layout::Layout::create(
203            fbb,
204            &layout::LayoutArgs {
205                encoding,
206                row_count: self.layout.row_count(),
207                metadata,
208                children,
209                segments,
210            },
211        ))
212    }
213}
214
215#[cfg(test)]
216mod tests {
217    use flatbuffers::FlatBufferBuilder;
218    use vortex_array::dtype::DType;
219    use vortex_array::dtype::Nullability;
220    use vortex_flatbuffers::layout as fbl;
221    use vortex_session::registry::ReadContext;
222
223    use super::layout_from_flatbuffer_with_options;
224    use crate::LayoutEncodingId;
225    use crate::session::LayoutSession;
226
227    #[test]
228    fn unknown_layout_encoding_allow_unknown() {
229        let mut fbb = FlatBufferBuilder::new();
230
231        let child_metadata = fbb.create_vector(&[9u8]);
232        let child = fbl::Layout::create(
233            &mut fbb,
234            &fbl::LayoutArgs {
235                encoding: 1,
236                row_count: 3,
237                metadata: Some(child_metadata),
238                children: None,
239                segments: None,
240            },
241        );
242
243        let children = fbb.create_vector(&[child]);
244        let metadata = fbb.create_vector(&[1u8, 2, 3]);
245        let segments = fbb.create_vector(&[7u32]);
246        let root = fbl::Layout::create(
247            &mut fbb,
248            &fbl::LayoutArgs {
249                encoding: 0,
250                row_count: 10,
251                metadata: Some(metadata),
252                children: Some(children),
253                segments: Some(segments),
254            },
255        );
256        fbb.finish_minimal(root);
257        let (buf, start) = fbb.collapse();
258        let layout_buffer = vortex_flatbuffers::FlatBuffer::align_from(
259            vortex_buffer::ByteBuffer::from(buf).slice(start..),
260        );
261
262        let layout_ctx = ReadContext::new([
263            LayoutEncodingId::new("vortex.test.foreign_layout"),
264            LayoutEncodingId::new("vortex.test.foreign_child_layout"),
265        ]);
266        let array_ctx = ReadContext::new([]);
267        let layouts = LayoutSession::default().registry().clone();
268
269        let layout = layout_from_flatbuffer_with_options(
270            layout_buffer,
271            &DType::Variant(Nullability::Nullable),
272            &layout_ctx,
273            &array_ctx,
274            &layouts,
275            true,
276        )
277        .unwrap();
278
279        assert_eq!(layout.encoding_id().as_ref(), "vortex.test.foreign_layout");
280        assert_eq!(layout.row_count(), 10);
281        assert_eq!(layout.metadata(), vec![1, 2, 3]);
282        assert_eq!(layout.segment_ids().len(), 1);
283        assert_eq!(*layout.segment_ids()[0], 7);
284        assert_eq!(layout.nchildren(), 1);
285
286        let child = layout.child(0).unwrap();
287        assert_eq!(
288            child.encoding_id().as_ref(),
289            "vortex.test.foreign_child_layout"
290        );
291        assert_eq!(child.metadata(), vec![9]);
292    }
293}