vortex_layout/
layout.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::fmt::{Debug, Formatter};
6use std::sync::Arc;
7
8use arcref::ArcRef;
9use itertools::Itertools;
10use vortex_array::SerializeMetadata;
11use vortex_dtype::{DType, FieldName};
12use vortex_error::{VortexExpect, VortexResult, vortex_err};
13
14use crate::segments::{SegmentId, SegmentSource};
15use crate::{LayoutEncodingId, LayoutEncodingRef, LayoutReaderRef, VTable};
16
17pub type LayoutId = ArcRef<str>;
18
19pub type LayoutRef = Arc<dyn Layout>;
20
21pub trait Layout: 'static + Send + Sync + Debug + private::Sealed {
22    fn as_any(&self) -> &dyn Any;
23
24    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync>;
25
26    fn to_layout(&self) -> LayoutRef;
27
28    /// Returns the [`crate::LayoutEncoding`] for this layout.
29    fn encoding(&self) -> LayoutEncodingRef;
30
31    /// The number of rows in this layout.
32    fn row_count(&self) -> u64;
33
34    /// The dtype of this layout when projected with the root scope.
35    fn dtype(&self) -> &DType;
36
37    /// The number of children in this layout.
38    fn nchildren(&self) -> usize;
39
40    /// Get the child at the given index.
41    fn child(&self, idx: usize) -> VortexResult<LayoutRef>;
42
43    /// Get the relative row offset of the child at the given index, returning `None` for
44    /// any auxilliary children, e.g. dictionary values, zone maps, etc.
45    fn child_type(&self, idx: usize) -> LayoutChildType;
46
47    /// Get the metadata for this layout.
48    fn metadata(&self) -> Vec<u8>;
49
50    /// Get the segment IDs for this layout.
51    fn segment_ids(&self) -> Vec<SegmentId>;
52
53    fn new_reader(
54        &self,
55        name: Arc<str>,
56        segment_source: Arc<dyn SegmentSource>,
57    ) -> VortexResult<LayoutReaderRef>;
58}
59
60pub trait IntoLayout {
61    /// Converts this type into a [`LayoutRef`].
62    fn into_layout(self) -> LayoutRef;
63}
64
65/// A type that allows us to identify how a layout child relates to its parent.
66#[derive(Debug, Clone, PartialEq, Eq)]
67pub enum LayoutChildType {
68    /// A layout child that retains the same schema and row offset position in the dataset.
69    Transparent(Arc<str>),
70    /// A layout child that provides auxiliary data, e.g. dictionary values, zone maps, etc.
71    /// Contains a human-readable name of the child.
72    Auxiliary(Arc<str>),
73    /// A layout child that represents a row-based chunk of data.
74    /// Contains the chunk index and relative row offset of the child.
75    Chunk((usize, u64)),
76    /// A layout child that represents a single field of data.
77    /// Contains the field name of the child.
78    Field(FieldName),
79    // A layout child that contains a subset of the fields of the parent layout.
80    // Contains a mask over the fields of the parent layout.
81    // TODO(ngates): FieldMask API needs fixing before we enable this. We also don't yet have a
82    //  use-case for this.
83    // Mask(Vec<FieldMask>),
84}
85
86impl LayoutChildType {
87    /// Returns the name of this child.
88    pub fn name(&self) -> Arc<str> {
89        match self {
90            LayoutChildType::Chunk((idx, _offset)) => format!("[{idx}]").into(),
91            LayoutChildType::Auxiliary(name) => name.clone(),
92            LayoutChildType::Transparent(name) => name.clone(),
93            LayoutChildType::Field(name) => name.clone(),
94        }
95    }
96
97    /// Returns the relative row offset of this child.
98    /// For auxiliary children, this is `None`.
99    pub fn row_offset(&self) -> Option<u64> {
100        match self {
101            LayoutChildType::Chunk((_idx, offset)) => Some(*offset),
102            LayoutChildType::Auxiliary(_) => None,
103            LayoutChildType::Transparent(_) => Some(0),
104            LayoutChildType::Field(_) => Some(0),
105        }
106    }
107}
108
109impl dyn Layout + '_ {
110    /// The ID of the encoding for this layout.
111    pub fn encoding_id(&self) -> LayoutEncodingId {
112        self.encoding().id()
113    }
114
115    /// The children of this layout.
116    pub fn children(&self) -> VortexResult<Vec<LayoutRef>> {
117        (0..self.nchildren()).map(|i| self.child(i)).try_collect()
118    }
119
120    /// The child types of this layout.
121    pub fn child_types(&self) -> impl Iterator<Item = LayoutChildType> {
122        (0..self.nchildren()).map(|i| self.child_type(i))
123    }
124
125    /// The names of the children of this layout.
126    pub fn child_names(&self) -> impl Iterator<Item = Arc<str>> {
127        self.child_types().map(|child| child.name())
128    }
129
130    /// The row offsets of the children of this layout, where `None` indicates an auxilliary child.
131    pub fn child_row_offsets(&self) -> impl Iterator<Item = Option<u64>> {
132        self.child_types().map(|child| child.row_offset())
133    }
134
135    pub fn is<V: VTable>(&self) -> bool {
136        self.as_opt::<V>().is_some()
137    }
138
139    /// Downcast a layout to a specific type.
140    pub fn as_<V: VTable>(&self) -> &V::Layout {
141        self.as_opt::<V>().vortex_expect("Failed to downcast")
142    }
143
144    /// Downcast a layout to a specific type.
145    pub fn as_opt<V: VTable>(&self) -> Option<&V::Layout> {
146        self.as_any()
147            .downcast_ref::<LayoutAdapter<V>>()
148            .map(|adapter| &adapter.0)
149    }
150
151    /// Downcast a layout to a specific type.
152    pub fn into<V: VTable>(self: Arc<Self>) -> Arc<V::Layout> {
153        let layout_adapter = self
154            .as_any_arc()
155            .downcast::<LayoutAdapter<V>>()
156            .map_err(|_| vortex_err!("Invalid layout type"))
157            .vortex_expect("Invalid layout type");
158
159        // SAFETY: LayoutAdapter<V> is #[repr(transparent)] (see line 192) which guarantees
160        // it has the same memory layout as V::Layout. The downcast above ensures we have
161        // the correct type. This transmute is safe because both Arc types point to data
162        // with identical layout and alignment.
163        unsafe { std::mem::transmute::<Arc<LayoutAdapter<V>>, Arc<V::Layout>>(layout_adapter) }
164    }
165
166    /// Depth-first traversal of the layout and its children.
167    pub fn depth_first_traversal(&self) -> impl Iterator<Item = VortexResult<LayoutRef>> {
168        /// A depth-first pre-order iterator over a layout.
169        struct ChildrenIterator {
170            stack: Vec<LayoutRef>,
171        }
172
173        impl Iterator for ChildrenIterator {
174            type Item = VortexResult<LayoutRef>;
175
176            fn next(&mut self) -> Option<Self::Item> {
177                let next = self.stack.pop()?;
178                let Ok(children) = next.children() else {
179                    return Some(Ok(next));
180                };
181                for child in children.into_iter().rev() {
182                    self.stack.push(child);
183                }
184                Some(Ok(next))
185            }
186        }
187
188        ChildrenIterator {
189            stack: vec![self.to_layout()],
190        }
191    }
192}
193
194#[repr(transparent)]
195pub struct LayoutAdapter<V: VTable>(V::Layout);
196
197impl<V: VTable> Debug for LayoutAdapter<V> {
198    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
199        self.0.fmt(f)
200    }
201}
202
203impl<V: VTable> Layout for LayoutAdapter<V> {
204    fn as_any(&self) -> &dyn Any {
205        self
206    }
207
208    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
209        self
210    }
211
212    fn to_layout(&self) -> LayoutRef {
213        Arc::new(LayoutAdapter::<V>(self.0.clone()))
214    }
215
216    fn encoding(&self) -> LayoutEncodingRef {
217        V::encoding(&self.0)
218    }
219
220    fn row_count(&self) -> u64 {
221        V::row_count(&self.0)
222    }
223
224    fn dtype(&self) -> &DType {
225        V::dtype(&self.0)
226    }
227
228    fn nchildren(&self) -> usize {
229        V::nchildren(&self.0)
230    }
231
232    fn child(&self, idx: usize) -> VortexResult<LayoutRef> {
233        V::child(&self.0, idx)
234    }
235
236    fn child_type(&self, idx: usize) -> LayoutChildType {
237        V::child_type(&self.0, idx)
238    }
239
240    fn metadata(&self) -> Vec<u8> {
241        V::metadata(&self.0).serialize()
242    }
243
244    fn segment_ids(&self) -> Vec<SegmentId> {
245        V::segment_ids(&self.0)
246    }
247
248    fn new_reader(
249        &self,
250        name: Arc<str>,
251        segment_source: Arc<dyn SegmentSource>,
252    ) -> VortexResult<LayoutReaderRef> {
253        V::new_reader(&self.0, name, segment_source)
254    }
255}
256
257mod private {
258    use super::*;
259
260    pub trait Sealed {}
261
262    impl<V: VTable> Sealed for LayoutAdapter<V> {}
263}
264
265#[cfg(test)]
266mod tests {
267    use rstest::rstest;
268
269    use super::*;
270
271    #[test]
272    fn test_layout_child_type_name() {
273        // Test Chunk variant
274        let chunk = LayoutChildType::Chunk((5, 100));
275        assert_eq!(chunk.name().as_ref(), "[5]");
276
277        // Test Field variant
278        let field = LayoutChildType::Field(Arc::from("customer_id"));
279        assert_eq!(field.name().as_ref(), "customer_id");
280
281        // Test Auxiliary variant
282        let aux = LayoutChildType::Auxiliary(Arc::from("zone_map"));
283        assert_eq!(aux.name().as_ref(), "zone_map");
284
285        // Test Transparent variant
286        let transparent = LayoutChildType::Transparent(Arc::from("compressed"));
287        assert_eq!(transparent.name().as_ref(), "compressed");
288    }
289
290    #[test]
291    fn test_layout_child_type_row_offset() {
292        // Chunk should return the offset
293        let chunk = LayoutChildType::Chunk((0, 42));
294        assert_eq!(chunk.row_offset(), Some(42));
295
296        // Field should return 0
297        let field = LayoutChildType::Field(Arc::from("field1"));
298        assert_eq!(field.row_offset(), Some(0));
299
300        // Auxiliary should return None
301        let aux = LayoutChildType::Auxiliary(Arc::from("metadata"));
302        assert_eq!(aux.row_offset(), None);
303
304        // Transparent should return 0
305        let transparent = LayoutChildType::Transparent(Arc::from("wrapper"));
306        assert_eq!(transparent.row_offset(), Some(0));
307    }
308
309    #[test]
310    fn test_layout_child_type_equality() {
311        // Test Chunk equality
312        let chunk1 = LayoutChildType::Chunk((1, 100));
313        let chunk2 = LayoutChildType::Chunk((1, 100));
314        let chunk3 = LayoutChildType::Chunk((2, 100));
315        let chunk4 = LayoutChildType::Chunk((1, 200));
316
317        assert_eq!(chunk1, chunk2);
318        assert_ne!(chunk1, chunk3);
319        assert_ne!(chunk1, chunk4);
320
321        // Test Field equality
322        let field1 = LayoutChildType::Field(Arc::from("name"));
323        let field2 = LayoutChildType::Field(Arc::from("name"));
324        let field3 = LayoutChildType::Field(Arc::from("age"));
325
326        assert_eq!(field1, field2);
327        assert_ne!(field1, field3);
328
329        // Test Auxiliary equality
330        let aux1 = LayoutChildType::Auxiliary(Arc::from("stats"));
331        let aux2 = LayoutChildType::Auxiliary(Arc::from("stats"));
332        let aux3 = LayoutChildType::Auxiliary(Arc::from("index"));
333
334        assert_eq!(aux1, aux2);
335        assert_ne!(aux1, aux3);
336
337        // Test Transparent equality
338        let trans1 = LayoutChildType::Transparent(Arc::from("enc"));
339        let trans2 = LayoutChildType::Transparent(Arc::from("enc"));
340        let trans3 = LayoutChildType::Transparent(Arc::from("dec"));
341
342        assert_eq!(trans1, trans2);
343        assert_ne!(trans1, trans3);
344
345        // Test cross-variant inequality
346        assert_ne!(chunk1, field1);
347        assert_ne!(field1, aux1);
348        assert_ne!(aux1, trans1);
349    }
350
351    #[rstest]
352    #[case(LayoutChildType::Chunk((0, 0)), "[0]", Some(0))]
353    #[case(LayoutChildType::Chunk((999, 1000000)), "[999]", Some(1000000))]
354    #[case(LayoutChildType::Field(Arc::from("")), "", Some(0))]
355    #[case(
356        LayoutChildType::Field(Arc::from("very_long_field_name_that_is_quite_lengthy")),
357        "very_long_field_name_that_is_quite_lengthy",
358        Some(0)
359    )]
360    #[case(LayoutChildType::Auxiliary(Arc::from("aux")), "aux", None)]
361    #[case(LayoutChildType::Transparent(Arc::from("t")), "t", Some(0))]
362    fn test_layout_child_type_parameterized(
363        #[case] child_type: LayoutChildType,
364        #[case] expected_name: &str,
365        #[case] expected_offset: Option<u64>,
366    ) {
367        assert_eq!(child_type.name().as_ref(), expected_name);
368        assert_eq!(child_type.row_offset(), expected_offset);
369    }
370
371    #[test]
372    fn test_chunk_with_different_indices_and_offsets() {
373        let chunks = [
374            LayoutChildType::Chunk((0, 0)),
375            LayoutChildType::Chunk((1, 100)),
376            LayoutChildType::Chunk((2, 200)),
377            LayoutChildType::Chunk((100, 10000)),
378        ];
379
380        for chunk in chunks.iter() {
381            let name = chunk.name();
382            assert!(name.starts_with('['));
383            assert!(name.ends_with(']'));
384
385            if let LayoutChildType::Chunk((idx, offset)) = chunk {
386                assert_eq!(name.as_ref(), format!("[{}]", idx));
387                assert_eq!(chunk.row_offset(), Some(*offset));
388            }
389        }
390    }
391
392    #[test]
393    fn test_field_names_with_special_characters() {
394        let special_fields: Vec<Arc<str>> = vec![
395            Arc::from("field-with-dashes"),
396            Arc::from("field_with_underscores"),
397            Arc::from("field.with.dots"),
398            Arc::from("field::with::colons"),
399            Arc::from("field/with/slashes"),
400            Arc::from("field@with#symbols"),
401        ];
402
403        for field_name in special_fields {
404            let field = LayoutChildType::Field(field_name.clone());
405            assert_eq!(field.name(), field_name);
406            assert_eq!(field.row_offset(), Some(0));
407        }
408    }
409}