Skip to main content

vortex_layout/
children.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5use std::fmt::Formatter;
6use std::sync::Arc;
7
8use flatbuffers::Follow;
9use itertools::Itertools;
10use vortex_array::dtype::DType;
11use vortex_error::VortexResult;
12use vortex_error::vortex_bail;
13use vortex_error::vortex_err;
14use vortex_flatbuffers::FlatBuffer;
15use vortex_flatbuffers::layout as fbl;
16use vortex_session::registry::ReadContext;
17
18use crate::LayoutRef;
19use crate::segments::SegmentId;
20use crate::session::LayoutRegistry;
21
22/// Abstract way of accessing the children of a layout.
23///
24/// This allows us to abstract over the lazy flatbuffer-based layouts, as well as the in-memory
25/// layout trees.
26pub trait LayoutChildren: 'static + Send + Sync {
27    fn to_arc(&self) -> Arc<dyn LayoutChildren>;
28
29    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef>;
30
31    fn child_row_count(&self, idx: usize) -> u64;
32
33    fn nchildren(&self) -> usize;
34}
35
36impl Debug for dyn LayoutChildren {
37    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
38        f.debug_struct("LayoutChildren")
39            .field("nchildren", &self.nchildren())
40            .finish()
41    }
42}
43
44impl LayoutChildren for Arc<dyn LayoutChildren> {
45    fn to_arc(&self) -> Arc<dyn LayoutChildren> {
46        self.clone()
47    }
48
49    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef> {
50        self.as_ref().child(idx, dtype)
51    }
52
53    fn child_row_count(&self, idx: usize) -> u64 {
54        self.as_ref().child_row_count(idx)
55    }
56
57    fn nchildren(&self) -> usize {
58        self.as_ref().nchildren()
59    }
60}
61
62/// An implementation of [`LayoutChildren`] for in-memory owned children.
63/// See also [`ViewLayoutChildren`] for lazily deserialized children from flatbuffers.
64#[derive(Clone)]
65pub(crate) struct OwnedLayoutChildren(Vec<LayoutRef>);
66
67impl OwnedLayoutChildren {
68    pub fn layout_children(children: Vec<LayoutRef>) -> Arc<dyn LayoutChildren> {
69        Arc::new(Self(children))
70    }
71}
72
73/// In-memory implementation of [`LayoutChildren`].
74impl LayoutChildren for OwnedLayoutChildren {
75    fn to_arc(&self) -> Arc<dyn LayoutChildren> {
76        Arc::new(self.clone())
77    }
78
79    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef> {
80        if idx >= self.0.len() {
81            vortex_bail!("Child index out of bounds: {} of {}", idx, self.0.len());
82        }
83        let child = &self.0[idx];
84        if child.dtype() != dtype {
85            vortex_bail!("Child dtype mismatch: {} != {}", child.dtype(), dtype);
86        }
87        Ok(child.clone())
88    }
89
90    fn child_row_count(&self, idx: usize) -> u64 {
91        self.0[idx].row_count()
92    }
93
94    fn nchildren(&self) -> usize {
95        self.0.len()
96    }
97}
98
99#[derive(Clone)]
100pub(crate) struct ViewedLayoutChildren {
101    flatbuffer: FlatBuffer,
102    flatbuffer_loc: usize,
103    array_read_ctx: ReadContext,
104    layout_read_ctx: ReadContext,
105    layouts: LayoutRegistry,
106}
107
108impl ViewedLayoutChildren {
109    /// Create a new [`ViewedLayoutChildren`] from the given parameters.
110    ///
111    /// # Safety
112    ///
113    /// Assumes the flatbuffer is validated and that the `flatbuffer_loc` is the correct offset
114    pub(super) unsafe fn new_unchecked(
115        flatbuffer: FlatBuffer,
116        flatbuffer_loc: usize,
117        array_read_ctx: ReadContext,
118        layout_read_ctx: ReadContext,
119        layouts: LayoutRegistry,
120    ) -> Self {
121        Self {
122            flatbuffer,
123            flatbuffer_loc,
124            array_read_ctx,
125            layout_read_ctx,
126            layouts,
127        }
128    }
129
130    /// Return the flatbuffer layout message.
131    fn flatbuffer(&self) -> fbl::Layout<'_> {
132        // SAFETY: flatbuffer_loc is guaranteed to be a valid offset into the flatbuffer
133        // as it was constructed from a validated flatbuffer in ViewedLayoutChildren::try_new.
134        // The lifetime of the returned Layout is tied to self, ensuring the buffer remains valid.
135        unsafe { fbl::Layout::follow(self.flatbuffer.as_ref(), self.flatbuffer_loc) }
136    }
137}
138
139impl LayoutChildren for ViewedLayoutChildren {
140    fn to_arc(&self) -> Arc<dyn LayoutChildren> {
141        Arc::new(self.clone())
142    }
143
144    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef> {
145        if idx >= self.nchildren() {
146            vortex_bail!("Child index out of bounds: {} of {}", idx, self.nchildren());
147        }
148        let fb_child = self.flatbuffer().children().unwrap_or_default().get(idx);
149
150        let viewed_children = ViewedLayoutChildren {
151            flatbuffer: self.flatbuffer.clone(),
152            flatbuffer_loc: fb_child._tab.loc(),
153            array_read_ctx: self.array_read_ctx.clone(),
154            layout_read_ctx: self.layout_read_ctx.clone(),
155            layouts: self.layouts.clone(),
156        };
157
158        let encoding_id = self
159            .layout_read_ctx
160            .resolve(fb_child.encoding())
161            .ok_or_else(|| vortex_err!("Encoding not found: {}", fb_child.encoding()))?;
162        let encoding = self.layouts.find(&encoding_id).ok_or_else(|| {
163            vortex_err!("Encoding not found in registry: {}", fb_child.encoding())
164        })?;
165
166        encoding.build(
167            dtype,
168            fb_child.row_count(),
169            fb_child
170                .metadata()
171                .map(|m| m.bytes())
172                .unwrap_or_else(|| &[]),
173            fb_child
174                .segments()
175                .unwrap_or_default()
176                .iter()
177                .map(SegmentId::from)
178                .collect_vec(),
179            &viewed_children,
180            &self.array_read_ctx,
181        )
182    }
183
184    fn child_row_count(&self, idx: usize) -> u64 {
185        // Efficiently get the row count of the child at the given index, without a full
186        // deserialization.
187        self.flatbuffer()
188            .children()
189            .unwrap_or_default()
190            .get(idx)
191            .row_count()
192    }
193
194    fn nchildren(&self) -> usize {
195        self.flatbuffer().children().unwrap_or_default().len()
196    }
197}