vortex_layout/
layout.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::fmt::{Debug, Formatter};
6use std::sync::Arc;
7
8use arcref::ArcRef;
9use itertools::Itertools;
10use vortex_array::SerializeMetadata;
11use vortex_dtype::{DType, FieldName};
12use vortex_error::{VortexExpect, VortexResult, vortex_err};
13
14use crate::segments::{SegmentId, SegmentSource};
15use crate::{LayoutEncodingId, LayoutEncodingRef, LayoutReaderRef, VTable};
16
17pub type LayoutId = ArcRef<str>;
18
19pub type LayoutRef = Arc<dyn Layout>;
20
21pub trait Layout: 'static + Send + Sync + Debug + private::Sealed {
22    fn as_any(&self) -> &dyn Any;
23
24    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync>;
25
26    fn to_layout(&self) -> LayoutRef;
27
28    /// Returns the [`crate::LayoutEncoding`] for this layout.
29    fn encoding(&self) -> LayoutEncodingRef;
30
31    /// The number of rows in this layout.
32    fn row_count(&self) -> u64;
33
34    /// The dtype of this layout when projected with the root scope.
35    fn dtype(&self) -> &DType;
36
37    /// The number of children in this layout.
38    fn nchildren(&self) -> usize;
39
40    /// Get the child at the given index.
41    fn child(&self, idx: usize) -> VortexResult<LayoutRef>;
42
43    /// Get the relative row offset of the child at the given index, returning `None` for
44    /// any auxilliary children, e.g. dictionary values, zone maps, etc.
45    fn child_type(&self, idx: usize) -> LayoutChildType;
46
47    /// Get the metadata for this layout.
48    fn metadata(&self) -> Vec<u8>;
49
50    /// Get the segment IDs for this layout.
51    fn segment_ids(&self) -> Vec<SegmentId>;
52
53    fn new_reader(
54        &self,
55        name: Arc<str>,
56        segment_source: Arc<dyn SegmentSource>,
57    ) -> VortexResult<LayoutReaderRef>;
58}
59
60pub trait IntoLayout {
61    /// Converts this type into a [`LayoutRef`].
62    fn into_layout(self) -> LayoutRef;
63}
64
65/// A type that allows us to identify how a layout child relates to its parent.
66#[derive(Debug, Clone, PartialEq, Eq)]
67pub enum LayoutChildType {
68    /// A layout child that retains the same schema and row offset position in the dataset.
69    Transparent(Arc<str>),
70    /// A layout child that provides auxiliary data, e.g. dictionary values, zone maps, etc.
71    /// Contains a human-readable name of the child.
72    Auxiliary(Arc<str>),
73    /// A layout child that represents a row-based chunk of data.
74    /// Contains the chunk index and relative row offset of the child.
75    Chunk((usize, u64)),
76    /// A layout child that represents a single field of data.
77    /// Contains the field name of the child.
78    Field(FieldName),
79    // A layout child that contains a subset of the fields of the parent layout.
80    // Contains a mask over the fields of the parent layout.
81    // TODO(ngates): FieldMask API needs fixing before we enable this. We also don't yet have a
82    //  use-case for this.
83    // Mask(Vec<FieldMask>),
84}
85
86impl LayoutChildType {
87    /// Returns the name of this child.
88    pub fn name(&self) -> Arc<str> {
89        match self {
90            LayoutChildType::Chunk((idx, _offset)) => format!("[{idx}]").into(),
91            LayoutChildType::Auxiliary(name) => name.clone(),
92            LayoutChildType::Transparent(name) => name.clone(),
93            LayoutChildType::Field(name) => name.clone(),
94        }
95    }
96
97    /// Returns the relative row offset of this child.
98    /// For auxiliary children, this is `None`.
99    pub fn row_offset(&self) -> Option<u64> {
100        match self {
101            LayoutChildType::Chunk((_idx, offset)) => Some(*offset),
102            LayoutChildType::Auxiliary(_) => None,
103            LayoutChildType::Transparent(_) => Some(0),
104            LayoutChildType::Field(_) => Some(0),
105        }
106    }
107}
108
109impl dyn Layout + '_ {
110    /// The ID of the encoding for this layout.
111    pub fn encoding_id(&self) -> LayoutEncodingId {
112        self.encoding().id()
113    }
114
115    /// The children of this layout.
116    pub fn children(&self) -> VortexResult<Vec<LayoutRef>> {
117        (0..self.nchildren()).map(|i| self.child(i)).try_collect()
118    }
119
120    /// The child types of this layout.
121    pub fn child_types(&self) -> impl Iterator<Item = LayoutChildType> {
122        (0..self.nchildren()).map(|i| self.child_type(i))
123    }
124
125    /// The names of the children of this layout.
126    pub fn child_names(&self) -> impl Iterator<Item = Arc<str>> {
127        self.child_types().map(|child| child.name())
128    }
129
130    /// The row offsets of the children of this layout, where `None` indicates an auxilliary child.
131    pub fn child_row_offsets(&self) -> impl Iterator<Item = Option<u64>> {
132        self.child_types().map(|child| child.row_offset())
133    }
134
135    pub fn is<V: VTable>(&self) -> bool {
136        self.as_opt::<V>().is_some()
137    }
138
139    /// Downcast a layout to a specific type.
140    pub fn as_<V: VTable>(&self) -> &V::Layout {
141        self.as_opt::<V>().vortex_expect("Failed to downcast")
142    }
143
144    /// Downcast a layout to a specific type.
145    pub fn as_opt<V: VTable>(&self) -> Option<&V::Layout> {
146        self.as_any()
147            .downcast_ref::<LayoutAdapter<V>>()
148            .map(|adapter| &adapter.0)
149    }
150
151    /// Downcast a layout to a specific type.
152    pub fn into<V: VTable>(self: Arc<Self>) -> Arc<V::Layout> {
153        let layout_adapter = self
154            .as_any_arc()
155            .downcast::<LayoutAdapter<V>>()
156            .map_err(|_| vortex_err!("Invalid layout type"))
157            .vortex_expect("Invalid layout type");
158
159        // Now we can perform a cheeky transmute since we know the adapter is transparent.
160        // SAFETY: The adapter is transparent and we know the underlying type is correct.
161        unsafe { std::mem::transmute::<Arc<LayoutAdapter<V>>, Arc<V::Layout>>(layout_adapter) }
162    }
163
164    /// Depth-first traversal of the layout and its children.
165    pub fn depth_first_traversal(&self) -> impl Iterator<Item = VortexResult<LayoutRef>> {
166        /// A depth-first pre-order iterator over a layout.
167        struct ChildrenIterator {
168            stack: Vec<LayoutRef>,
169        }
170
171        impl Iterator for ChildrenIterator {
172            type Item = VortexResult<LayoutRef>;
173
174            fn next(&mut self) -> Option<Self::Item> {
175                let next = self.stack.pop()?;
176                let Ok(children) = next.children() else {
177                    return Some(Ok(next));
178                };
179                for child in children.into_iter().rev() {
180                    self.stack.push(child);
181                }
182                Some(Ok(next))
183            }
184        }
185
186        ChildrenIterator {
187            stack: vec![self.to_layout()],
188        }
189    }
190}
191
192#[repr(transparent)]
193pub struct LayoutAdapter<V: VTable>(V::Layout);
194
195impl<V: VTable> Debug for LayoutAdapter<V> {
196    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
197        self.0.fmt(f)
198    }
199}
200
201impl<V: VTable> Layout for LayoutAdapter<V> {
202    fn as_any(&self) -> &dyn Any {
203        self
204    }
205
206    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
207        self
208    }
209
210    fn to_layout(&self) -> LayoutRef {
211        Arc::new(LayoutAdapter::<V>(self.0.clone()))
212    }
213
214    fn encoding(&self) -> LayoutEncodingRef {
215        V::encoding(&self.0)
216    }
217
218    fn row_count(&self) -> u64 {
219        V::row_count(&self.0)
220    }
221
222    fn dtype(&self) -> &DType {
223        V::dtype(&self.0)
224    }
225
226    fn nchildren(&self) -> usize {
227        V::nchildren(&self.0)
228    }
229
230    fn child(&self, idx: usize) -> VortexResult<LayoutRef> {
231        V::child(&self.0, idx)
232    }
233
234    fn child_type(&self, idx: usize) -> LayoutChildType {
235        V::child_type(&self.0, idx)
236    }
237
238    fn metadata(&self) -> Vec<u8> {
239        V::metadata(&self.0).serialize()
240    }
241
242    fn segment_ids(&self) -> Vec<SegmentId> {
243        V::segment_ids(&self.0)
244    }
245
246    fn new_reader(
247        &self,
248        name: Arc<str>,
249        segment_source: Arc<dyn SegmentSource>,
250    ) -> VortexResult<LayoutReaderRef> {
251        V::new_reader(&self.0, name, segment_source)
252    }
253}
254
255mod private {
256    use super::*;
257
258    pub trait Sealed {}
259
260    impl<V: VTable> Sealed for LayoutAdapter<V> {}
261}