vortex_layout/
children.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::{Debug, Formatter};
5use std::sync::Arc;
6
7use flatbuffers::Follow;
8use itertools::Itertools;
9use vortex_array::ArrayContext;
10use vortex_dtype::DType;
11use vortex_error::{VortexResult, vortex_bail, vortex_err};
12use vortex_flatbuffers::{FlatBuffer, layout as fbl};
13
14use crate::segments::SegmentId;
15use crate::{LayoutContext, LayoutRef};
16
17/// Abstract way of accessing the children of a layout.
18///
19/// This allows us to abstract over the lazy flatbuffer-based layouts, as well as the in-memory
20/// layout trees.
21pub trait LayoutChildren: 'static + Send + Sync {
22    fn to_arc(&self) -> Arc<dyn LayoutChildren>;
23
24    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef>;
25
26    fn child_row_count(&self, idx: usize) -> u64;
27
28    fn nchildren(&self) -> usize;
29}
30
31impl Debug for dyn LayoutChildren {
32    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
33        f.debug_struct("LayoutChildren")
34            .field("nchildren", &self.nchildren())
35            .finish()
36    }
37}
38
39impl LayoutChildren for Arc<dyn LayoutChildren> {
40    fn to_arc(&self) -> Arc<dyn LayoutChildren> {
41        self.clone()
42    }
43
44    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef> {
45        self.as_ref().child(idx, dtype)
46    }
47
48    fn child_row_count(&self, idx: usize) -> u64 {
49        self.as_ref().child_row_count(idx)
50    }
51
52    fn nchildren(&self) -> usize {
53        self.as_ref().nchildren()
54    }
55}
56
57/// An implementation of [`LayoutChildren`] for in-memory owned children.
58/// See also [`ViewLayoutChildren`] for lazily deserialized children from flatbuffers.
59#[derive(Clone)]
60pub(crate) struct OwnedLayoutChildren(Vec<LayoutRef>);
61
62impl OwnedLayoutChildren {
63    pub fn layout_children(children: Vec<LayoutRef>) -> Arc<dyn LayoutChildren> {
64        Arc::new(Self(children))
65    }
66}
67
68/// In-memory implementation of [`LayoutChildren`].
69impl LayoutChildren for OwnedLayoutChildren {
70    fn to_arc(&self) -> Arc<dyn LayoutChildren> {
71        Arc::new(self.clone())
72    }
73
74    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef> {
75        if idx >= self.0.len() {
76            vortex_bail!("Child index out of bounds: {} of {}", idx, self.0.len());
77        }
78        let child = &self.0[idx];
79        if child.dtype() != dtype {
80            vortex_bail!("Child dtype mismatch: {} != {}", child.dtype(), dtype);
81        }
82        Ok(child.clone())
83    }
84
85    fn child_row_count(&self, idx: usize) -> u64 {
86        self.0[idx].row_count()
87    }
88
89    fn nchildren(&self) -> usize {
90        self.0.len()
91    }
92}
93
94#[derive(Clone)]
95pub(crate) struct ViewedLayoutChildren {
96    flatbuffer: FlatBuffer,
97    flatbuffer_loc: usize,
98    array_ctx: ArrayContext,
99    layout_ctx: LayoutContext,
100}
101
102impl ViewedLayoutChildren {
103    /// Create a new [`ViewedLayoutChildren`] from the given parameters.
104    ///
105    /// # Safety
106    ///
107    /// Assumes the flatbuffer is validated and that the `flatbuffer_loc` is the correct offset
108    pub(super) unsafe fn new_unchecked(
109        flatbuffer: FlatBuffer,
110        flatbuffer_loc: usize,
111        array_ctx: ArrayContext,
112        layout_ctx: LayoutContext,
113    ) -> Self {
114        Self {
115            flatbuffer,
116            flatbuffer_loc,
117            array_ctx,
118            layout_ctx,
119        }
120    }
121
122    /// Return the flatbuffer layout message.
123    fn flatbuffer(&self) -> fbl::Layout<'_> {
124        // SAFETY: flatbuffer_loc is guaranteed to be a valid offset into the flatbuffer
125        // as it was constructed from a validated flatbuffer in ViewedLayoutChildren::try_new.
126        // The lifetime of the returned Layout is tied to self, ensuring the buffer remains valid.
127        unsafe { fbl::Layout::follow(self.flatbuffer.as_ref(), self.flatbuffer_loc) }
128    }
129}
130
131impl LayoutChildren for ViewedLayoutChildren {
132    fn to_arc(&self) -> Arc<dyn LayoutChildren> {
133        Arc::new(self.clone())
134    }
135
136    fn child(&self, idx: usize, dtype: &DType) -> VortexResult<LayoutRef> {
137        if idx >= self.nchildren() {
138            vortex_bail!("Child index out of bounds: {} of {}", idx, self.nchildren());
139        }
140        let fb_child = self.flatbuffer().children().unwrap_or_default().get(idx);
141
142        let viewed_children = ViewedLayoutChildren {
143            flatbuffer: self.flatbuffer.clone(),
144            flatbuffer_loc: fb_child._tab.loc(),
145            array_ctx: self.array_ctx.clone(),
146            layout_ctx: self.layout_ctx.clone(),
147        };
148        let encoding = self
149            .layout_ctx
150            .lookup_encoding(fb_child.encoding())
151            .ok_or_else(|| vortex_err!("Encoding not found: {}", fb_child.encoding()))?;
152
153        encoding.build(
154            dtype,
155            fb_child.row_count(),
156            fb_child
157                .metadata()
158                .map(|m| m.bytes())
159                .unwrap_or_else(|| &[]),
160            fb_child
161                .segments()
162                .unwrap_or_default()
163                .iter()
164                .map(SegmentId::from)
165                .collect_vec(),
166            &viewed_children,
167            self.array_ctx.clone(),
168        )
169    }
170
171    fn child_row_count(&self, idx: usize) -> u64 {
172        // Efficiently get the row count of the child at the given index, without a full
173        // deserialization.
174        self.flatbuffer()
175            .children()
176            .unwrap_or_default()
177            .get(idx)
178            .row_count()
179    }
180
181    fn nchildren(&self) -> usize {
182        self.flatbuffer().children().unwrap_or_default().len()
183    }
184}