Skip to main content

vortex_layout/
children.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5use std::fmt::Formatter;
6use std::sync::Arc;
7
8use flatbuffers::Follow;
9use itertools::Itertools;
10use vortex_array::dtype::DType;
11use vortex_error::VortexResult;
12use vortex_error::vortex_bail;
13use vortex_error::vortex_err;
14use vortex_flatbuffers::FlatBuffer;
15use vortex_flatbuffers::layout as fbl;
16use vortex_session::registry::ReadContext;
17
18use crate::LayoutRef;
19use crate::layouts::foreign::new_foreign_layout;
20use crate::segments::SegmentId;
21use crate::session::LayoutRegistry;
22
23/// Abstract way of accessing the children of a layout.
24///
25/// This allows us to abstract over the lazy flatbuffer-based layouts, as well as the in-memory
26/// layout trees.
27pub trait LayoutChildren: 'static + Send + Sync {
28    fn to_arc(&self) -> Arc<dyn LayoutChildren>;
29
30    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef>;
31
32    fn child_row_count(&self, idx: usize) -> u64;
33
34    fn nchildren(&self) -> usize;
35}
36
37impl Debug for dyn LayoutChildren {
38    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
39        f.debug_struct("LayoutChildren")
40            .field("nchildren", &self.nchildren())
41            .finish()
42    }
43}
44
45impl LayoutChildren for Arc<dyn LayoutChildren> {
46    fn to_arc(&self) -> Arc<dyn LayoutChildren> {
47        Arc::clone(self)
48    }
49
50    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef> {
51        self.as_ref().child(idx, dtype)
52    }
53
54    fn child_row_count(&self, idx: usize) -> u64 {
55        self.as_ref().child_row_count(idx)
56    }
57
58    fn nchildren(&self) -> usize {
59        self.as_ref().nchildren()
60    }
61}
62
63/// An implementation of [`LayoutChildren`] for in-memory owned children.
64/// See also [`ViewLayoutChildren`] for lazily deserialized children from flatbuffers.
65#[derive(Clone)]
66pub(crate) struct OwnedLayoutChildren(Vec<LayoutRef>);
67
68impl OwnedLayoutChildren {
69    pub fn layout_children(children: Vec<LayoutRef>) -> Arc<dyn LayoutChildren> {
70        Arc::new(Self(children))
71    }
72}
73
74/// In-memory implementation of [`LayoutChildren`].
75impl LayoutChildren for OwnedLayoutChildren {
76    fn to_arc(&self) -> Arc<dyn LayoutChildren> {
77        Arc::new(self.clone())
78    }
79
80    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef> {
81        if idx >= self.0.len() {
82            vortex_bail!("Child index out of bounds: {} of {}", idx, self.0.len());
83        }
84        let child = &self.0[idx];
85        if child.dtype() != dtype {
86            vortex_bail!("Child dtype mismatch: {} != {}", child.dtype(), dtype);
87        }
88        Ok(Arc::clone(child))
89    }
90
91    fn child_row_count(&self, idx: usize) -> u64 {
92        self.0[idx].row_count()
93    }
94
95    fn nchildren(&self) -> usize {
96        self.0.len()
97    }
98}
99
100#[derive(Clone)]
101pub(crate) struct ViewedLayoutChildren {
102    flatbuffer: FlatBuffer,
103    flatbuffer_loc: usize,
104    array_read_ctx: ReadContext,
105    layout_read_ctx: ReadContext,
106    layouts: LayoutRegistry,
107    allow_unknown: bool,
108}
109
110impl ViewedLayoutChildren {
111    /// Create a new [`ViewedLayoutChildren`] from the given parameters.
112    ///
113    /// # Safety
114    ///
115    /// Assumes the flatbuffer is validated and that the `flatbuffer_loc` is the correct offset
116    pub(super) unsafe fn new_unchecked(
117        flatbuffer: FlatBuffer,
118        flatbuffer_loc: usize,
119        array_read_ctx: ReadContext,
120        layout_read_ctx: ReadContext,
121        layouts: LayoutRegistry,
122        allow_unknown: bool,
123    ) -> Self {
124        Self {
125            flatbuffer,
126            flatbuffer_loc,
127            array_read_ctx,
128            layout_read_ctx,
129            layouts,
130            allow_unknown,
131        }
132    }
133
134    /// Return the flatbuffer layout message.
135    fn flatbuffer(&self) -> fbl::Layout<'_> {
136        // SAFETY: flatbuffer_loc is guaranteed to be a valid offset into the flatbuffer
137        // as it was constructed from a validated flatbuffer in ViewedLayoutChildren::try_new.
138        // The lifetime of the returned Layout is tied to self, ensuring the buffer remains valid.
139        unsafe { fbl::Layout::follow(self.flatbuffer.as_ref(), self.flatbuffer_loc) }
140    }
141
142    fn foreign_layout_from_fb(
143        &self,
144        fb_layout: fbl::Layout<'_>,
145        dtype: &DType,
146    ) -> VortexResult<LayoutRef> {
147        let encoding_id = self
148            .layout_read_ctx
149            .resolve(fb_layout.encoding())
150            .ok_or_else(|| vortex_err!("Encoding not found: {}", fb_layout.encoding()))?;
151
152        let children = fb_layout
153            .children()
154            .unwrap_or_default()
155            .iter()
156            .map(|child| self.foreign_layout_from_fb(child, dtype))
157            .collect::<VortexResult<Vec<_>>>()?;
158
159        Ok(new_foreign_layout(
160            encoding_id,
161            dtype.clone(),
162            fb_layout.row_count(),
163            fb_layout
164                .metadata()
165                .map(|m| m.bytes().to_vec())
166                .unwrap_or_default(),
167            fb_layout
168                .segments()
169                .unwrap_or_default()
170                .iter()
171                .map(SegmentId::from)
172                .collect_vec(),
173            children,
174        ))
175    }
176}
177
178impl LayoutChildren for ViewedLayoutChildren {
179    fn to_arc(&self) -> Arc<dyn LayoutChildren> {
180        Arc::new(self.clone())
181    }
182
183    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef> {
184        if idx >= self.nchildren() {
185            vortex_bail!("Child index out of bounds: {} of {}", idx, self.nchildren());
186        }
187        let fb_child = self.flatbuffer().children().unwrap_or_default().get(idx);
188
189        let viewed_children = ViewedLayoutChildren {
190            flatbuffer: self.flatbuffer.clone(),
191            flatbuffer_loc: fb_child._tab.loc(),
192            array_read_ctx: self.array_read_ctx.clone(),
193            layout_read_ctx: self.layout_read_ctx.clone(),
194            layouts: self.layouts.clone(),
195            allow_unknown: self.allow_unknown,
196        };
197
198        let encoding_id = self
199            .layout_read_ctx
200            .resolve(fb_child.encoding())
201            .ok_or_else(|| vortex_err!("Encoding not found: {}", fb_child.encoding()))?;
202        let Some(encoding) = self.layouts.find(&encoding_id) else {
203            if self.allow_unknown {
204                return viewed_children.foreign_layout_from_fb(fb_child, dtype);
205            }
206            return Err(vortex_err!(
207                "Encoding not found in registry: {}",
208                fb_child.encoding()
209            ));
210        };
211
212        encoding.build(
213            dtype,
214            fb_child.row_count(),
215            fb_child
216                .metadata()
217                .map(|m| m.bytes())
218                .unwrap_or_else(|| &[]),
219            fb_child
220                .segments()
221                .unwrap_or_default()
222                .iter()
223                .map(SegmentId::from)
224                .collect_vec(),
225            &viewed_children,
226            &self.array_read_ctx,
227        )
228    }
229
230    fn child_row_count(&self, idx: usize) -> u64 {
231        // Efficiently get the row count of the child at the given index, without a full
232        // deserialization.
233        self.flatbuffer()
234            .children()
235            .unwrap_or_default()
236            .get(idx)
237            .row_count()
238    }
239
240    fn nchildren(&self) -> usize {
241        self.flatbuffer().children().unwrap_or_default().len()
242    }
243}