Skip to main content

vortex_layout/
children.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5use std::fmt::Formatter;
6use std::sync::Arc;
7
8use flatbuffers::Follow;
9use itertools::Itertools;
10use vortex_array::dtype::DType;
11use vortex_error::VortexResult;
12use vortex_error::vortex_bail;
13use vortex_error::vortex_err;
14use vortex_flatbuffers::FlatBuffer;
15use vortex_flatbuffers::layout as fbl;
16use vortex_session::registry::ReadContext;
17
18use crate::LayoutRef;
19use crate::segments::SegmentId;
20use crate::session::LayoutRegistry;
21
22/// Abstract way of accessing the children of a layout.
23///
24/// This allows us to abstract over the lazy flatbuffer-based layouts, as well as the in-memory
25/// layout trees.
26pub trait LayoutChildren: 'static + Send + Sync {
27    fn to_arc(&self) -> Arc<dyn LayoutChildren>;
28
29    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef>;
30
31    fn child_row_count(&self, idx: usize) -> u64;
32
33    fn nchildren(&self) -> usize;
34}
35
36impl Debug for dyn LayoutChildren {
37    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
38        f.debug_struct("LayoutChildren")
39            .field("nchildren", &self.nchildren())
40            .finish()
41    }
42}
43
44impl LayoutChildren for Arc<dyn LayoutChildren> {
45    fn to_arc(&self) -> Arc<dyn LayoutChildren> {
46        Arc::clone(self)
47    }
48
49    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef> {
50        self.as_ref().child(idx, dtype)
51    }
52
53    fn child_row_count(&self, idx: usize) -> u64 {
54        self.as_ref().child_row_count(idx)
55    }
56
57    fn nchildren(&self) -> usize {
58        self.as_ref().nchildren()
59    }
60}
61
62/// An implementation of [`LayoutChildren`] for in-memory owned children.
63/// See also [`ViewLayoutChildren`] for lazily deserialized children from flatbuffers.
64#[derive(Clone)]
65pub(crate) struct OwnedLayoutChildren(Vec<LayoutRef>);
66
67impl OwnedLayoutChildren {
68    pub fn layout_children(children: Vec<LayoutRef>) -> Arc<dyn LayoutChildren> {
69        Arc::new(Self(children))
70    }
71}
72
73/// In-memory implementation of [`LayoutChildren`].
74impl LayoutChildren for OwnedLayoutChildren {
75    fn to_arc(&self) -> Arc<dyn LayoutChildren> {
76        Arc::new(self.clone())
77    }
78
79    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef> {
80        if idx >= self.0.len() {
81            vortex_bail!("Child index out of bounds: {} of {}", idx, self.0.len());
82        }
83        let child = &self.0[idx];
84        if child.dtype() != dtype {
85            vortex_bail!("Child dtype mismatch: {} != {}", child.dtype(), dtype);
86        }
87        Ok(Arc::clone(child))
88    }
89
90    fn child_row_count(&self, idx: usize) -> u64 {
91        self.0[idx].row_count()
92    }
93
94    fn nchildren(&self) -> usize {
95        self.0.len()
96    }
97}
98
99#[derive(Clone)]
100pub(crate) struct ViewedLayoutChildren {
101    flatbuffer: FlatBuffer,
102    flatbuffer_loc: usize,
103    array_read_ctx: ReadContext,
104    layout_read_ctx: ReadContext,
105    layouts: LayoutRegistry,
106    allow_unknown: bool,
107}
108
109impl ViewedLayoutChildren {
110    /// Create a new [`ViewedLayoutChildren`] from the given parameters.
111    ///
112    /// # Safety
113    ///
114    /// Assumes the flatbuffer is validated and that the `flatbuffer_loc` is the correct offset
115    pub(super) unsafe fn new_unchecked(
116        flatbuffer: FlatBuffer,
117        flatbuffer_loc: usize,
118        array_read_ctx: ReadContext,
119        layout_read_ctx: ReadContext,
120        layouts: LayoutRegistry,
121        allow_unknown: bool,
122    ) -> Self {
123        Self {
124            flatbuffer,
125            flatbuffer_loc,
126            array_read_ctx,
127            layout_read_ctx,
128            layouts,
129            allow_unknown,
130        }
131    }
132
133    /// Return the flatbuffer layout message.
134    fn flatbuffer(&self) -> fbl::Layout<'_> {
135        // SAFETY: flatbuffer_loc is guaranteed to be a valid offset into the flatbuffer
136        // as it was constructed from a validated flatbuffer in ViewedLayoutChildren::try_new.
137        // The lifetime of the returned Layout is tied to self, ensuring the buffer remains valid.
138        unsafe { fbl::Layout::follow(self.flatbuffer.as_ref(), self.flatbuffer_loc) }
139    }
140
141    fn foreign_layout_from_fb(
142        &self,
143        fb_layout: fbl::Layout<'_>,
144        dtype: &DType,
145    ) -> VortexResult<LayoutRef> {
146        let encoding_id = self
147            .layout_read_ctx
148            .resolve(fb_layout.encoding())
149            .ok_or_else(|| vortex_err!("Encoding not found: {}", fb_layout.encoding()))?;
150
151        let children = fb_layout
152            .children()
153            .unwrap_or_default()
154            .iter()
155            .map(|child| self.foreign_layout_from_fb(child, dtype))
156            .collect::<VortexResult<Vec<_>>>()?;
157
158        Ok(crate::layouts::foreign::new_foreign_layout(
159            encoding_id,
160            dtype.clone(),
161            fb_layout.row_count(),
162            fb_layout
163                .metadata()
164                .map(|m| m.bytes().to_vec())
165                .unwrap_or_default(),
166            fb_layout
167                .segments()
168                .unwrap_or_default()
169                .iter()
170                .map(SegmentId::from)
171                .collect_vec(),
172            children,
173        ))
174    }
175}
176
177impl LayoutChildren for ViewedLayoutChildren {
178    fn to_arc(&self) -> Arc<dyn LayoutChildren> {
179        Arc::new(self.clone())
180    }
181
182    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef> {
183        if idx >= self.nchildren() {
184            vortex_bail!("Child index out of bounds: {} of {}", idx, self.nchildren());
185        }
186        let fb_child = self.flatbuffer().children().unwrap_or_default().get(idx);
187
188        let viewed_children = ViewedLayoutChildren {
189            flatbuffer: self.flatbuffer.clone(),
190            flatbuffer_loc: fb_child._tab.loc(),
191            array_read_ctx: self.array_read_ctx.clone(),
192            layout_read_ctx: self.layout_read_ctx.clone(),
193            layouts: self.layouts.clone(),
194            allow_unknown: self.allow_unknown,
195        };
196
197        let encoding_id = self
198            .layout_read_ctx
199            .resolve(fb_child.encoding())
200            .ok_or_else(|| vortex_err!("Encoding not found: {}", fb_child.encoding()))?;
201        let Some(encoding) = self.layouts.find(&encoding_id) else {
202            if self.allow_unknown {
203                return viewed_children.foreign_layout_from_fb(fb_child, dtype);
204            }
205            return Err(vortex_err!(
206                "Encoding not found in registry: {}",
207                fb_child.encoding()
208            ));
209        };
210
211        encoding.build(
212            dtype,
213            fb_child.row_count(),
214            fb_child
215                .metadata()
216                .map(|m| m.bytes())
217                .unwrap_or_else(|| &[]),
218            fb_child
219                .segments()
220                .unwrap_or_default()
221                .iter()
222                .map(SegmentId::from)
223                .collect_vec(),
224            &viewed_children,
225            &self.array_read_ctx,
226        )
227    }
228
229    fn child_row_count(&self, idx: usize) -> u64 {
230        // Efficiently get the row count of the child at the given index, without a full
231        // deserialization.
232        self.flatbuffer()
233            .children()
234            .unwrap_or_default()
235            .get(idx)
236            .row_count()
237    }
238
239    fn nchildren(&self) -> usize {
240        self.flatbuffer().children().unwrap_or_default().len()
241    }
242}