Skip to main content

vortex_layout/
children.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5use std::fmt::Formatter;
6use std::sync::Arc;
7
8use flatbuffers::Follow;
9use itertools::Itertools;
10use once_cell::sync::OnceCell;
11use vortex_array::dtype::DType;
12use vortex_error::VortexResult;
13use vortex_error::vortex_bail;
14use vortex_error::vortex_err;
15use vortex_flatbuffers::FlatBuffer;
16use vortex_flatbuffers::layout as fbl;
17use vortex_session::registry::ReadContext;
18
19use crate::LayoutRef;
20use crate::layouts::foreign::new_foreign_layout;
21use crate::segments::SegmentId;
22use crate::session::LayoutRegistry;
23
24/// Abstract way of accessing the children of a layout.
25///
26/// This allows us to abstract over the lazy flatbuffer-based layouts, as well as the in-memory
27/// layout trees.
28pub trait LayoutChildren: 'static + Send + Sync {
29    fn to_arc(&self) -> Arc<dyn LayoutChildren>;
30
31    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef>;
32
33    fn child_row_count(&self, idx: usize) -> u64;
34
35    fn nchildren(&self) -> usize;
36}
37
38impl Debug for dyn LayoutChildren {
39    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
40        f.debug_struct("LayoutChildren")
41            .field("nchildren", &self.nchildren())
42            .finish()
43    }
44}
45
46impl LayoutChildren for Arc<dyn LayoutChildren> {
47    fn to_arc(&self) -> Arc<dyn LayoutChildren> {
48        Arc::clone(self)
49    }
50
51    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef> {
52        self.as_ref().child(idx, dtype)
53    }
54
55    fn child_row_count(&self, idx: usize) -> u64 {
56        self.as_ref().child_row_count(idx)
57    }
58
59    fn nchildren(&self) -> usize {
60        self.as_ref().nchildren()
61    }
62}
63
64/// An implementation of [`LayoutChildren`] for in-memory owned children.
65#[derive(Clone)]
66pub(crate) struct OwnedLayoutChildren(Vec<LayoutRef>);
67
68impl OwnedLayoutChildren {
69    pub fn layout_children(children: Vec<LayoutRef>) -> Arc<dyn LayoutChildren> {
70        Arc::new(Self(children))
71    }
72}
73
74/// In-memory implementation of [`LayoutChildren`].
75impl LayoutChildren for OwnedLayoutChildren {
76    fn to_arc(&self) -> Arc<dyn LayoutChildren> {
77        Arc::new(self.clone())
78    }
79
80    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef> {
81        if idx >= self.0.len() {
82            vortex_bail!("Child index out of bounds: {} of {}", idx, self.0.len());
83        }
84        let child = &self.0[idx];
85        if child.dtype() != dtype {
86            vortex_bail!("Child dtype mismatch: {} != {}", child.dtype(), dtype);
87        }
88        Ok(Arc::clone(child))
89    }
90
91    fn child_row_count(&self, idx: usize) -> u64 {
92        self.0[idx].row_count()
93    }
94
95    fn nchildren(&self) -> usize {
96        self.0.len()
97    }
98}
99
100#[derive(Clone)]
101pub(crate) struct ViewedLayoutChildren {
102    flatbuffer: FlatBuffer,
103    flatbuffer_loc: usize,
104    array_read_ctx: ReadContext,
105    layout_read_ctx: ReadContext,
106    layouts: LayoutRegistry,
107    allow_unknown: bool,
108    cache: Arc<[OnceCell<LayoutRef>]>,
109}
110
111impl ViewedLayoutChildren {
112    /// Create a new [`ViewedLayoutChildren`] from the given parameters.
113    ///
114    /// # Safety
115    ///
116    /// Assumes the flatbuffer is validated and that the `flatbuffer_loc` is the correct offset
117    pub(super) unsafe fn new_unchecked(
118        flatbuffer: FlatBuffer,
119        flatbuffer_loc: usize,
120        array_read_ctx: ReadContext,
121        layout_read_ctx: ReadContext,
122        layouts: LayoutRegistry,
123        allow_unknown: bool,
124    ) -> Self {
125        // SAFETY: guaranteed by caller
126        let nchildren = unsafe { fbl::Layout::follow(flatbuffer.as_ref(), flatbuffer_loc) }
127            .children()
128            .unwrap_or_default()
129            .len();
130        let cache = vec![OnceCell::new(); nchildren].into_boxed_slice().into();
131        Self {
132            flatbuffer,
133            flatbuffer_loc,
134            array_read_ctx,
135            layout_read_ctx,
136            layouts,
137            allow_unknown,
138            cache,
139        }
140    }
141
142    /// Return the flatbuffer layout message.
143    fn flatbuffer(&self) -> fbl::Layout<'_> {
144        // SAFETY: flatbuffer_loc is guaranteed to be a valid offset into the flatbuffer
145        // as it was constructed from a validated flatbuffer in ViewedLayoutChildren::try_new.
146        // The lifetime of the returned Layout is tied to self, ensuring the buffer remains valid.
147        unsafe { fbl::Layout::follow(self.flatbuffer.as_ref(), self.flatbuffer_loc) }
148    }
149
150    fn foreign_layout_from_fb(
151        &self,
152        fb_layout: fbl::Layout<'_>,
153        dtype: &DType,
154    ) -> VortexResult<LayoutRef> {
155        let encoding_id = self
156            .layout_read_ctx
157            .resolve(fb_layout.encoding())
158            .ok_or_else(|| vortex_err!("Encoding not found: {}", fb_layout.encoding()))?;
159
160        let children = fb_layout
161            .children()
162            .unwrap_or_default()
163            .iter()
164            .map(|child| self.foreign_layout_from_fb(child, dtype))
165            .collect::<VortexResult<Vec<_>>>()?;
166
167        Ok(new_foreign_layout(
168            encoding_id,
169            dtype.clone(),
170            fb_layout.row_count(),
171            fb_layout
172                .metadata()
173                .map(|m| m.bytes().to_vec())
174                .unwrap_or_default(),
175            fb_layout
176                .segments()
177                .unwrap_or_default()
178                .iter()
179                .map(SegmentId::from)
180                .collect_vec(),
181            children,
182        ))
183    }
184}
185
186impl LayoutChildren for ViewedLayoutChildren {
187    fn to_arc(&self) -> Arc<dyn LayoutChildren> {
188        Arc::new(self.clone())
189    }
190
191    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef> {
192        if idx >= self.nchildren() {
193            vortex_bail!("Child index out of bounds: {} of {}", idx, self.nchildren());
194        }
195
196        let layout_ref = self.cache[idx].get_or_try_init(|| {
197            let fb_child = self.flatbuffer().children().unwrap_or_default().get(idx);
198
199            // SAFETY: same validated flatbuffer; fb_child._tab.loc() is a valid offset
200            // We need this to avoid re-initializing cache here
201            let viewed_children = unsafe {
202                ViewedLayoutChildren::new_unchecked(
203                    self.flatbuffer.clone(),
204                    fb_child._tab.loc(),
205                    self.array_read_ctx.clone(),
206                    self.layout_read_ctx.clone(),
207                    self.layouts.clone(),
208                    self.allow_unknown,
209                )
210            };
211
212            let encoding_id = self
213                .layout_read_ctx
214                .resolve(fb_child.encoding())
215                .ok_or_else(|| vortex_err!("Encoding not found: {}", fb_child.encoding()))?;
216            let Some(encoding) = self.layouts.find(&encoding_id) else {
217                if self.allow_unknown {
218                    return viewed_children.foreign_layout_from_fb(fb_child, dtype);
219                }
220                return Err(vortex_err!(
221                    "Encoding not found in registry: {}",
222                    fb_child.encoding()
223                ));
224            };
225
226            encoding.build(
227                dtype,
228                fb_child.row_count(),
229                fb_child
230                    .metadata()
231                    .map(|m| m.bytes())
232                    .unwrap_or_else(|| &[]),
233                fb_child
234                    .segments()
235                    .unwrap_or_default()
236                    .iter()
237                    .map(SegmentId::from)
238                    .collect_vec(),
239                &viewed_children,
240                &self.array_read_ctx,
241            )
242        })?;
243        Ok(Arc::clone(layout_ref))
244    }
245
246    fn child_row_count(&self, idx: usize) -> u64 {
247        // Efficiently get the row count of the child at the given index, without a full
248        // deserialization.
249        self.flatbuffer()
250            .children()
251            .unwrap_or_default()
252            .get(idx)
253            .row_count()
254    }
255
256    fn nchildren(&self) -> usize {
257        self.cache.len()
258    }
259}