Skip to main content

vortex_layout/
children.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5use std::fmt::Formatter;
6use std::sync::Arc;
7
8use flatbuffers::Follow;
9use itertools::Itertools;
10use vortex_array::ArrayContext;
11use vortex_dtype::DType;
12use vortex_error::VortexResult;
13use vortex_error::vortex_bail;
14use vortex_error::vortex_err;
15use vortex_flatbuffers::FlatBuffer;
16use vortex_flatbuffers::layout as fbl;
17
18use crate::LayoutContext;
19use crate::LayoutRef;
20use crate::segments::SegmentId;
21use crate::session::LayoutRegistry;
22
23/// Abstract way of accessing the children of a layout.
24///
25/// This allows us to abstract over the lazy flatbuffer-based layouts, as well as the in-memory
26/// layout trees.
27pub trait LayoutChildren: 'static + Send + Sync {
28    fn to_arc(&self) -> Arc<dyn LayoutChildren>;
29
30    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef>;
31
32    fn child_row_count(&self, idx: usize) -> u64;
33
34    fn nchildren(&self) -> usize;
35}
36
37impl Debug for dyn LayoutChildren {
38    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
39        f.debug_struct("LayoutChildren")
40            .field("nchildren", &self.nchildren())
41            .finish()
42    }
43}
44
45impl LayoutChildren for Arc<dyn LayoutChildren> {
46    fn to_arc(&self) -> Arc<dyn LayoutChildren> {
47        self.clone()
48    }
49
50    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef> {
51        self.as_ref().child(idx, dtype)
52    }
53
54    fn child_row_count(&self, idx: usize) -> u64 {
55        self.as_ref().child_row_count(idx)
56    }
57
58    fn nchildren(&self) -> usize {
59        self.as_ref().nchildren()
60    }
61}
62
63/// An implementation of [`LayoutChildren`] for in-memory owned children.
64/// See also [`ViewLayoutChildren`] for lazily deserialized children from flatbuffers.
65#[derive(Clone)]
66pub(crate) struct OwnedLayoutChildren(Vec<LayoutRef>);
67
68impl OwnedLayoutChildren {
69    pub fn layout_children(children: Vec<LayoutRef>) -> Arc<dyn LayoutChildren> {
70        Arc::new(Self(children))
71    }
72}
73
74/// In-memory implementation of [`LayoutChildren`].
75impl LayoutChildren for OwnedLayoutChildren {
76    fn to_arc(&self) -> Arc<dyn LayoutChildren> {
77        Arc::new(self.clone())
78    }
79
80    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef> {
81        if idx >= self.0.len() {
82            vortex_bail!("Child index out of bounds: {} of {}", idx, self.0.len());
83        }
84        let child = &self.0[idx];
85        if child.dtype() != dtype {
86            vortex_bail!("Child dtype mismatch: {} != {}", child.dtype(), dtype);
87        }
88        Ok(child.clone())
89    }
90
91    fn child_row_count(&self, idx: usize) -> u64 {
92        self.0[idx].row_count()
93    }
94
95    fn nchildren(&self) -> usize {
96        self.0.len()
97    }
98}
99
100#[derive(Clone)]
101pub(crate) struct ViewedLayoutChildren {
102    flatbuffer: FlatBuffer,
103    flatbuffer_loc: usize,
104    array_ctx: ArrayContext,
105    layout_ctx: LayoutContext,
106    layouts: LayoutRegistry,
107}
108
109impl ViewedLayoutChildren {
110    /// Create a new [`ViewedLayoutChildren`] from the given parameters.
111    ///
112    /// # Safety
113    ///
114    /// Assumes the flatbuffer is validated and that the `flatbuffer_loc` is the correct offset
115    pub(super) unsafe fn new_unchecked(
116        flatbuffer: FlatBuffer,
117        flatbuffer_loc: usize,
118        array_ctx: ArrayContext,
119        layout_ctx: LayoutContext,
120        layouts: LayoutRegistry,
121    ) -> Self {
122        Self {
123            flatbuffer,
124            flatbuffer_loc,
125            array_ctx,
126            layout_ctx,
127            layouts,
128        }
129    }
130
131    /// Return the flatbuffer layout message.
132    fn flatbuffer(&self) -> fbl::Layout<'_> {
133        // SAFETY: flatbuffer_loc is guaranteed to be a valid offset into the flatbuffer
134        // as it was constructed from a validated flatbuffer in ViewedLayoutChildren::try_new.
135        // The lifetime of the returned Layout is tied to self, ensuring the buffer remains valid.
136        unsafe { fbl::Layout::follow(self.flatbuffer.as_ref(), self.flatbuffer_loc) }
137    }
138}
139
140impl LayoutChildren for ViewedLayoutChildren {
141    fn to_arc(&self) -> Arc<dyn LayoutChildren> {
142        Arc::new(self.clone())
143    }
144
145    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef> {
146        if idx >= self.nchildren() {
147            vortex_bail!("Child index out of bounds: {} of {}", idx, self.nchildren());
148        }
149        let fb_child = self.flatbuffer().children().unwrap_or_default().get(idx);
150
151        let viewed_children = ViewedLayoutChildren {
152            flatbuffer: self.flatbuffer.clone(),
153            flatbuffer_loc: fb_child._tab.loc(),
154            array_ctx: self.array_ctx.clone(),
155            layout_ctx: self.layout_ctx.clone(),
156            layouts: self.layouts.clone(),
157        };
158
159        let encoding_id = self
160            .layout_ctx
161            .resolve(fb_child.encoding())
162            .ok_or_else(|| vortex_err!("Encoding not found: {}", fb_child.encoding()))?;
163        let encoding = self.layouts.find(&encoding_id).ok_or_else(|| {
164            vortex_err!("Encoding not found in registry: {}", fb_child.encoding())
165        })?;
166
167        encoding.build(
168            dtype,
169            fb_child.row_count(),
170            fb_child
171                .metadata()
172                .map(|m| m.bytes())
173                .unwrap_or_else(|| &[]),
174            fb_child
175                .segments()
176                .unwrap_or_default()
177                .iter()
178                .map(SegmentId::from)
179                .collect_vec(),
180            &viewed_children,
181            &self.array_ctx,
182        )
183    }
184
185    fn child_row_count(&self, idx: usize) -> u64 {
186        // Efficiently get the row count of the child at the given index, without a full
187        // deserialization.
188        self.flatbuffer()
189            .children()
190            .unwrap_or_default()
191            .get(idx)
192            .row_count()
193    }
194
195    fn nchildren(&self) -> usize {
196        self.flatbuffer().children().unwrap_or_default().len()
197    }
198}