vortex_layout/
layout.rs

1use std::any::Any;
2use std::collections::BTreeSet;
3use std::fmt::{Debug, Formatter};
4use std::sync::Arc;
5
6use arcref::ArcRef;
7use itertools::Itertools;
8use vortex_array::{ArrayContext, SerializeMetadata};
9use vortex_dtype::{DType, FieldMask, FieldName};
10use vortex_error::{VortexExpect, VortexResult, vortex_err};
11
12use crate::segments::{SegmentId, SegmentSource};
13use crate::{LayoutEncodingId, LayoutEncodingRef, LayoutReaderRef, VTable};
14
15pub type LayoutId = ArcRef<str>;
16
17pub type LayoutRef = Arc<dyn Layout>;
18
19pub trait Layout: 'static + Send + Sync + Debug + private::Sealed {
20    fn as_any(&self) -> &dyn Any;
21
22    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync>;
23
24    fn to_layout(&self) -> LayoutRef;
25
26    /// Returns the [`crate::LayoutEncoding`] for this layout.
27    fn encoding(&self) -> LayoutEncodingRef;
28
29    /// The number of rows in this layout.
30    fn row_count(&self) -> u64;
31
32    /// The dtype of this layout.
33    fn dtype(&self) -> &DType;
34
35    /// The number of children in this layout.
36    fn nchildren(&self) -> usize;
37
38    /// Get the child at the given index.
39    fn child(&self, idx: usize) -> VortexResult<LayoutRef>;
40
41    /// Get the relative row offset of the child at the given index, returning `None` for
42    /// any auxilliary children, e.g. dictionary values, zone maps, etc.
43    fn child_type(&self, idx: usize) -> LayoutChildType;
44
45    /// Get the metadata for this layout.
46    fn metadata(&self) -> Vec<u8>;
47
48    /// Get the segment IDs for this layout.
49    fn segment_ids(&self) -> Vec<SegmentId>;
50
51    fn register_splits(
52        &self,
53        field_mask: &[FieldMask],
54        row_offset: u64,
55        splits: &mut BTreeSet<u64>,
56    ) -> VortexResult<()>;
57
58    fn new_reader(
59        &self,
60        name: &Arc<str>,
61        segment_source: &Arc<dyn SegmentSource>,
62        ctx: &ArrayContext,
63    ) -> VortexResult<LayoutReaderRef>;
64}
65
66pub trait IntoLayout {
67    /// Converts this type into a [`LayoutRef`].
68    fn into_layout(self) -> LayoutRef;
69}
70
71/// A type that allows us to identify how a layout child relates to its parent.
72#[derive(Debug, Clone, PartialEq, Eq)]
73pub enum LayoutChildType {
74    /// A layout child that retains the same schema and row offset position in the dataset.
75    Transparent(Arc<str>),
76    /// A layout child that provides auxiliary data, e.g. dictionary values, zone maps, etc.
77    /// Contains a human-readable name of the child.
78    Auxiliary(Arc<str>),
79    /// A layout child that represents a row-based chunk of data.
80    /// Contains the chunk index and relative row offset of the child.
81    Chunk((usize, u64)),
82    /// A layout child that represents a single field of data.
83    /// Contains the field name of the child.
84    Field(FieldName),
85    // A layout child that contains a subset of the fields of the parent layout.
86    // Contains a mask over the fields of the parent layout.
87    // TODO(ngates): FieldMask API needs fixing before we enable this. We also don't yet have a
88    //  use-case for this.
89    // Mask(Vec<FieldMask>),
90}
91
92impl LayoutChildType {
93    /// Returns the name of this child.
94    pub fn name(&self) -> Arc<str> {
95        match self {
96            LayoutChildType::Chunk((idx, _offset)) => format!("[{}]", idx).into(),
97            LayoutChildType::Auxiliary(name) => name.clone(),
98            LayoutChildType::Transparent(name) => name.clone(),
99            LayoutChildType::Field(name) => name.clone(),
100        }
101    }
102
103    /// Returns the relative row offset of this child.
104    /// For auxiliary children, this is `None`.
105    pub fn row_offset(&self) -> Option<u64> {
106        match self {
107            LayoutChildType::Chunk((_idx, offset)) => Some(*offset),
108            LayoutChildType::Auxiliary(_) => None,
109            LayoutChildType::Transparent(_) => Some(0),
110            LayoutChildType::Field(_) => Some(0),
111        }
112    }
113}
114
115impl dyn Layout + '_ {
116    /// The ID of the encoding for this layout.
117    pub fn encoding_id(&self) -> LayoutEncodingId {
118        self.encoding().id()
119    }
120
121    /// The children of this layout.
122    pub fn children(&self) -> VortexResult<Vec<LayoutRef>> {
123        (0..self.nchildren()).map(|i| self.child(i)).try_collect()
124    }
125
126    /// The child types of this layout.
127    pub fn child_types(&self) -> impl Iterator<Item = LayoutChildType> {
128        (0..self.nchildren()).map(|i| self.child_type(i))
129    }
130
131    /// The names of the children of this layout.
132    pub fn child_names(&self) -> impl Iterator<Item = Arc<str>> {
133        self.child_types().map(|child| child.name())
134    }
135
136    /// The row offsets of the children of this layout, where `None` indicates an auxilliary child.
137    pub fn child_row_offsets(&self) -> impl Iterator<Item = Option<u64>> {
138        self.child_types().map(|child| child.row_offset())
139    }
140
141    pub fn is<V: VTable>(&self) -> bool {
142        self.as_opt::<V>().is_some()
143    }
144
145    /// Downcast a layout to a specific type.
146    pub fn as_<V: VTable>(&self) -> &V::Layout {
147        self.as_opt::<V>().vortex_expect("Failed to downcast")
148    }
149
150    /// Downcast a layout to a specific type.
151    pub fn as_opt<V: VTable>(&self) -> Option<&V::Layout> {
152        self.as_any()
153            .downcast_ref::<LayoutAdapter<V>>()
154            .map(|adapter| &adapter.0)
155    }
156
157    /// Downcast a layout to a specific type.
158    pub fn into<V: VTable>(self: Arc<Self>) -> Arc<V::Layout> {
159        let layout_adapter = self
160            .as_any_arc()
161            .downcast::<LayoutAdapter<V>>()
162            .map_err(|_| vortex_err!("Invalid layout type"))
163            .vortex_expect("Invalid layout type");
164
165        // Now we can perform a cheeky transmute since we know the adapter is transparent.
166        // SAFETY: The adapter is transparent and we know the underlying type is correct.
167        unsafe { std::mem::transmute::<Arc<LayoutAdapter<V>>, Arc<V::Layout>>(layout_adapter) }
168    }
169
170    /// Depth-first traversal of the layout and its children.
171    pub fn depth_first_traversal(&self) -> impl Iterator<Item = VortexResult<LayoutRef>> {
172        /// A depth-first pre-order iterator over a layout.
173        struct ChildrenIterator {
174            stack: Vec<LayoutRef>,
175        }
176
177        impl Iterator for ChildrenIterator {
178            type Item = VortexResult<LayoutRef>;
179
180            fn next(&mut self) -> Option<Self::Item> {
181                let next = self.stack.pop()?;
182                let Ok(children) = next.children() else {
183                    return Some(Ok(next));
184                };
185                for child in children.into_iter().rev() {
186                    self.stack.push(child);
187                }
188                Some(Ok(next))
189            }
190        }
191
192        ChildrenIterator {
193            stack: vec![self.to_layout()],
194        }
195    }
196}
197
198#[repr(transparent)]
199pub struct LayoutAdapter<V: VTable>(V::Layout);
200
201impl<V: VTable> Debug for LayoutAdapter<V> {
202    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
203        self.0.fmt(f)
204    }
205}
206
207impl<V: VTable> Layout for LayoutAdapter<V> {
208    fn as_any(&self) -> &dyn Any {
209        self
210    }
211
212    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
213        self
214    }
215
216    fn to_layout(&self) -> LayoutRef {
217        Arc::new(LayoutAdapter::<V>(self.0.clone()))
218    }
219
220    fn encoding(&self) -> LayoutEncodingRef {
221        V::encoding(&self.0)
222    }
223
224    fn row_count(&self) -> u64 {
225        V::row_count(&self.0)
226    }
227
228    fn dtype(&self) -> &DType {
229        V::dtype(&self.0)
230    }
231
232    fn nchildren(&self) -> usize {
233        V::nchildren(&self.0)
234    }
235
236    fn child(&self, idx: usize) -> VortexResult<LayoutRef> {
237        V::child(&self.0, idx)
238    }
239
240    fn child_type(&self, idx: usize) -> LayoutChildType {
241        V::child_type(&self.0, idx)
242    }
243
244    fn metadata(&self) -> Vec<u8> {
245        V::metadata(&self.0).serialize()
246    }
247
248    fn segment_ids(&self) -> Vec<SegmentId> {
249        V::segment_ids(&self.0)
250    }
251
252    fn register_splits(
253        &self,
254        field_mask: &[FieldMask],
255        row_offset: u64,
256        splits: &mut BTreeSet<u64>,
257    ) -> VortexResult<()> {
258        V::register_splits(&self.0, field_mask, row_offset, splits)
259    }
260
261    fn new_reader(
262        &self,
263        name: &Arc<str>,
264        segment_source: &Arc<dyn SegmentSource>,
265        ctx: &ArrayContext,
266    ) -> VortexResult<LayoutReaderRef> {
267        V::new_reader(&self.0, name, segment_source, ctx)
268    }
269}
270
271mod private {
272    use super::*;
273
274    pub trait Sealed {}
275
276    impl<V: VTable> Sealed for LayoutAdapter<V> {}
277}