vortex_layout/
layout.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::fmt::Debug;
6use std::fmt::Formatter;
7use std::sync::Arc;
8
9use arcref::ArcRef;
10use itertools::Itertools;
11use vortex_array::SerializeMetadata;
12use vortex_dtype::DType;
13use vortex_dtype::FieldName;
14use vortex_error::VortexExpect;
15use vortex_error::VortexResult;
16use vortex_error::vortex_err;
17use vortex_session::VortexSession;
18
19use crate::LayoutEncodingId;
20use crate::LayoutEncodingRef;
21use crate::LayoutReaderRef;
22use crate::VTable;
23use crate::display::DisplayLayoutTree;
24use crate::segments::SegmentId;
25use crate::segments::SegmentSource;
26
27pub type LayoutId = ArcRef<str>;
28
29pub type LayoutRef = Arc<dyn Layout>;
30
31pub trait Layout: 'static + Send + Sync + Debug + private::Sealed {
32    fn as_any(&self) -> &dyn Any;
33
34    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync>;
35
36    fn to_layout(&self) -> LayoutRef;
37
38    /// Returns the [`crate::LayoutEncoding`] for this layout.
39    fn encoding(&self) -> LayoutEncodingRef;
40
41    /// The number of rows in this layout.
42    fn row_count(&self) -> u64;
43
44    /// The dtype of this layout when projected with the root scope.
45    fn dtype(&self) -> &DType;
46
47    /// The number of children in this layout.
48    fn nchildren(&self) -> usize;
49
50    /// Get the child at the given index.
51    fn child(&self, idx: usize) -> VortexResult<LayoutRef>;
52
53    /// Get the relative row offset of the child at the given index, returning `None` for
54    /// any auxiliary children, e.g. dictionary values, zone maps, etc.
55    fn child_type(&self, idx: usize) -> LayoutChildType;
56
57    /// Get the metadata for this layout.
58    fn metadata(&self) -> Vec<u8>;
59
60    /// Get the segment IDs for this layout.
61    fn segment_ids(&self) -> Vec<SegmentId>;
62
63    #[cfg(gpu_unstable)]
64    fn new_gpu_reader(
65        &self,
66        name: Arc<str>,
67        segment_source: Arc<dyn SegmentSource>,
68        ctx: Arc<cudarc::driver::CudaContext>,
69    ) -> VortexResult<crate::gpu::GpuLayoutReaderRef>;
70
71    fn new_reader(
72        &self,
73        name: Arc<str>,
74        segment_source: Arc<dyn SegmentSource>,
75        session: &VortexSession,
76    ) -> VortexResult<LayoutReaderRef>;
77}
78
79pub trait IntoLayout {
80    /// Converts this type into a [`LayoutRef`].
81    fn into_layout(self) -> LayoutRef;
82}
83
84/// A type that allows us to identify how a layout child relates to its parent.
85#[derive(Debug, Clone, PartialEq, Eq)]
86pub enum LayoutChildType {
87    /// A layout child that retains the same schema and row offset position in the dataset.
88    Transparent(Arc<str>),
89    /// A layout child that provides auxiliary data, e.g. dictionary values, zone maps, etc.
90    /// Contains a human-readable name of the child.
91    Auxiliary(Arc<str>),
92    /// A layout child that represents a row-based chunk of data.
93    /// Contains the chunk index and relative row offset of the child.
94    Chunk((usize, u64)),
95    /// A layout child that represents a single field of data.
96    /// Contains the field name of the child.
97    Field(FieldName),
98}
99
100impl LayoutChildType {
101    /// Returns the name of this child.
102    pub fn name(&self) -> Arc<str> {
103        match self {
104            LayoutChildType::Chunk((idx, _offset)) => format!("[{idx}]").into(),
105            LayoutChildType::Auxiliary(name) => name.clone(),
106            LayoutChildType::Transparent(name) => name.clone(),
107            LayoutChildType::Field(name) => name.clone().into(),
108        }
109    }
110
111    /// Returns the relative row offset of this child.
112    /// For auxiliary children, this is `None`.
113    pub fn row_offset(&self) -> Option<u64> {
114        match self {
115            LayoutChildType::Chunk((_idx, offset)) => Some(*offset),
116            LayoutChildType::Auxiliary(_) => None,
117            LayoutChildType::Transparent(_) => Some(0),
118            LayoutChildType::Field(_) => Some(0),
119        }
120    }
121}
122
123impl dyn Layout + '_ {
124    /// The ID of the encoding for this layout.
125    pub fn encoding_id(&self) -> LayoutEncodingId {
126        self.encoding().id()
127    }
128
129    /// The children of this layout.
130    pub fn children(&self) -> VortexResult<Vec<LayoutRef>> {
131        (0..self.nchildren()).map(|i| self.child(i)).try_collect()
132    }
133
134    /// The child types of this layout.
135    pub fn child_types(&self) -> impl Iterator<Item = LayoutChildType> {
136        (0..self.nchildren()).map(|i| self.child_type(i))
137    }
138
139    /// The names of the children of this layout.
140    pub fn child_names(&self) -> impl Iterator<Item = Arc<str>> {
141        self.child_types().map(|child| child.name())
142    }
143
144    /// The row offsets of the children of this layout, where `None` indicates an auxiliary child.
145    pub fn child_row_offsets(&self) -> impl Iterator<Item = Option<u64>> {
146        self.child_types().map(|child| child.row_offset())
147    }
148
149    pub fn is<V: VTable>(&self) -> bool {
150        self.as_opt::<V>().is_some()
151    }
152
153    /// Downcast a layout to a specific type.
154    pub fn as_<V: VTable>(&self) -> &V::Layout {
155        self.as_opt::<V>().vortex_expect("Failed to downcast")
156    }
157
158    /// Downcast a layout to a specific type.
159    pub fn as_opt<V: VTable>(&self) -> Option<&V::Layout> {
160        self.as_any()
161            .downcast_ref::<LayoutAdapter<V>>()
162            .map(|adapter| &adapter.0)
163    }
164
165    /// Downcast a layout to a specific type.
166    pub fn into<V: VTable>(self: Arc<Self>) -> Arc<V::Layout> {
167        let layout_adapter = self
168            .as_any_arc()
169            .downcast::<LayoutAdapter<V>>()
170            .map_err(|_| vortex_err!("Invalid layout type"))
171            .vortex_expect("Invalid layout type");
172
173        // SAFETY: LayoutAdapter<V> is #[repr(transparent)] (see line 192) which guarantees
174        // it has the same memory layout as V::Layout. The downcast above ensures we have
175        // the correct type. This transmute is safe because both Arc types point to data
176        // with identical layout and alignment.
177        unsafe { std::mem::transmute::<Arc<LayoutAdapter<V>>, Arc<V::Layout>>(layout_adapter) }
178    }
179
180    /// Depth-first traversal of the layout and its children.
181    pub fn depth_first_traversal(&self) -> impl Iterator<Item = VortexResult<LayoutRef>> {
182        /// A depth-first pre-order iterator over a layout.
183        struct ChildrenIterator {
184            stack: Vec<LayoutRef>,
185        }
186
187        impl Iterator for ChildrenIterator {
188            type Item = VortexResult<LayoutRef>;
189
190            fn next(&mut self) -> Option<Self::Item> {
191                let next = self.stack.pop()?;
192                let Ok(children) = next.children() else {
193                    return Some(Ok(next));
194                };
195                for child in children.into_iter().rev() {
196                    self.stack.push(child);
197                }
198                Some(Ok(next))
199            }
200        }
201
202        ChildrenIterator {
203            stack: vec![self.to_layout()],
204        }
205    }
206
207    /// Display the layout as a tree structure.
208    pub fn display_tree(&self) -> DisplayLayoutTree {
209        DisplayLayoutTree::new(self.to_layout(), false)
210    }
211
212    /// Display the layout as a tree structure with optional verbose metadata.
213    pub fn display_tree_verbose(&self, verbose: bool) -> DisplayLayoutTree {
214        DisplayLayoutTree::new(self.to_layout(), verbose)
215    }
216}
217
218#[repr(transparent)]
219pub struct LayoutAdapter<V: VTable>(V::Layout);
220
221impl<V: VTable> Debug for LayoutAdapter<V> {
222    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
223        self.0.fmt(f)
224    }
225}
226
227impl<V: VTable> Layout for LayoutAdapter<V> {
228    fn as_any(&self) -> &dyn Any {
229        self
230    }
231
232    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
233        self
234    }
235
236    fn to_layout(&self) -> LayoutRef {
237        Arc::new(LayoutAdapter::<V>(self.0.clone()))
238    }
239
240    fn encoding(&self) -> LayoutEncodingRef {
241        V::encoding(&self.0)
242    }
243
244    fn row_count(&self) -> u64 {
245        V::row_count(&self.0)
246    }
247
248    fn dtype(&self) -> &DType {
249        V::dtype(&self.0)
250    }
251
252    fn nchildren(&self) -> usize {
253        V::nchildren(&self.0)
254    }
255
256    fn child(&self, idx: usize) -> VortexResult<LayoutRef> {
257        V::child(&self.0, idx)
258    }
259
260    fn child_type(&self, idx: usize) -> LayoutChildType {
261        V::child_type(&self.0, idx)
262    }
263
264    fn metadata(&self) -> Vec<u8> {
265        V::metadata(&self.0).serialize()
266    }
267
268    fn segment_ids(&self) -> Vec<SegmentId> {
269        V::segment_ids(&self.0)
270    }
271
272    #[cfg(gpu_unstable)]
273    fn new_gpu_reader(
274        &self,
275        name: Arc<str>,
276        segment_source: Arc<dyn SegmentSource>,
277        ctx: Arc<cudarc::driver::CudaContext>,
278    ) -> VortexResult<crate::gpu::GpuLayoutReaderRef> {
279        V::new_gpu_reader(&self.0, name, segment_source, ctx)
280    }
281
282    fn new_reader(
283        &self,
284        name: Arc<str>,
285        segment_source: Arc<dyn SegmentSource>,
286        session: &VortexSession,
287    ) -> VortexResult<LayoutReaderRef> {
288        V::new_reader(&self.0, name, segment_source, session)
289    }
290}
291
292mod private {
293    use super::*;
294
295    pub trait Sealed {}
296
297    impl<V: VTable> Sealed for LayoutAdapter<V> {}
298}
299
300#[cfg(test)]
301mod tests {
302    use rstest::rstest;
303
304    use super::*;
305
306    #[test]
307    fn test_layout_child_type_name() {
308        // Test Chunk variant
309        let chunk = LayoutChildType::Chunk((5, 100));
310        assert_eq!(chunk.name().as_ref(), "[5]");
311
312        // Test Field variant
313        let field = LayoutChildType::Field(FieldName::from("customer_id"));
314        assert_eq!(field.name().as_ref(), "customer_id");
315
316        // Test Auxiliary variant
317        let aux = LayoutChildType::Auxiliary(Arc::from("zone_map"));
318        assert_eq!(aux.name().as_ref(), "zone_map");
319
320        // Test Transparent variant
321        let transparent = LayoutChildType::Transparent(Arc::from("compressed"));
322        assert_eq!(transparent.name().as_ref(), "compressed");
323    }
324
325    #[test]
326    fn test_layout_child_type_row_offset() {
327        // Chunk should return the offset
328        let chunk = LayoutChildType::Chunk((0, 42));
329        assert_eq!(chunk.row_offset(), Some(42));
330
331        // Field should return 0
332        let field = LayoutChildType::Field(FieldName::from("field1"));
333        assert_eq!(field.row_offset(), Some(0));
334
335        // Auxiliary should return None
336        let aux = LayoutChildType::Auxiliary(Arc::from("metadata"));
337        assert_eq!(aux.row_offset(), None);
338
339        // Transparent should return 0
340        let transparent = LayoutChildType::Transparent(Arc::from("wrapper"));
341        assert_eq!(transparent.row_offset(), Some(0));
342    }
343
344    #[test]
345    fn test_layout_child_type_equality() {
346        // Test Chunk equality
347        let chunk1 = LayoutChildType::Chunk((1, 100));
348        let chunk2 = LayoutChildType::Chunk((1, 100));
349        let chunk3 = LayoutChildType::Chunk((2, 100));
350        let chunk4 = LayoutChildType::Chunk((1, 200));
351
352        assert_eq!(chunk1, chunk2);
353        assert_ne!(chunk1, chunk3);
354        assert_ne!(chunk1, chunk4);
355
356        // Test Field equality
357        let field1 = LayoutChildType::Field(FieldName::from("name"));
358        let field2 = LayoutChildType::Field(FieldName::from("name"));
359        let field3 = LayoutChildType::Field(FieldName::from("age"));
360
361        assert_eq!(field1, field2);
362        assert_ne!(field1, field3);
363
364        // Test Auxiliary equality
365        let aux1 = LayoutChildType::Auxiliary(Arc::from("stats"));
366        let aux2 = LayoutChildType::Auxiliary(Arc::from("stats"));
367        let aux3 = LayoutChildType::Auxiliary(Arc::from("index"));
368
369        assert_eq!(aux1, aux2);
370        assert_ne!(aux1, aux3);
371
372        // Test Transparent equality
373        let trans1 = LayoutChildType::Transparent(Arc::from("enc"));
374        let trans2 = LayoutChildType::Transparent(Arc::from("enc"));
375        let trans3 = LayoutChildType::Transparent(Arc::from("dec"));
376
377        assert_eq!(trans1, trans2);
378        assert_ne!(trans1, trans3);
379
380        // Test cross-variant inequality
381        assert_ne!(chunk1, field1);
382        assert_ne!(field1, aux1);
383        assert_ne!(aux1, trans1);
384    }
385
386    #[rstest]
387    #[case(LayoutChildType::Chunk((0, 0)), "[0]", Some(0))]
388    #[case(LayoutChildType::Chunk((999, 1000000)), "[999]", Some(1000000))]
389    #[case(LayoutChildType::Field(FieldName::from("")), "", Some(0))]
390    #[case(
391        LayoutChildType::Field(FieldName::from("very_long_field_name_that_is_quite_lengthy")),
392        "very_long_field_name_that_is_quite_lengthy",
393        Some(0)
394    )]
395    #[case(LayoutChildType::Auxiliary(Arc::from("aux")), "aux", None)]
396    #[case(LayoutChildType::Transparent(Arc::from("t")), "t", Some(0))]
397    fn test_layout_child_type_parameterized(
398        #[case] child_type: LayoutChildType,
399        #[case] expected_name: &str,
400        #[case] expected_offset: Option<u64>,
401    ) {
402        assert_eq!(child_type.name().as_ref(), expected_name);
403        assert_eq!(child_type.row_offset(), expected_offset);
404    }
405
406    #[test]
407    fn test_chunk_with_different_indices_and_offsets() {
408        let chunks = [
409            LayoutChildType::Chunk((0, 0)),
410            LayoutChildType::Chunk((1, 100)),
411            LayoutChildType::Chunk((2, 200)),
412            LayoutChildType::Chunk((100, 10000)),
413        ];
414
415        for chunk in chunks.iter() {
416            let name = chunk.name();
417            assert!(name.starts_with('['));
418            assert!(name.ends_with(']'));
419
420            if let LayoutChildType::Chunk((idx, offset)) = chunk {
421                assert_eq!(name.as_ref(), format!("[{}]", idx));
422                assert_eq!(chunk.row_offset(), Some(*offset));
423            }
424        }
425    }
426
427    #[test]
428    fn test_field_names_with_special_characters() {
429        let special_fields: Vec<Arc<str>> = vec![
430            Arc::from("field-with-dashes"),
431            Arc::from("field_with_underscores"),
432            Arc::from("field.with.dots"),
433            Arc::from("field::with::colons"),
434            Arc::from("field/with/slashes"),
435            Arc::from("field@with#symbols"),
436        ];
437
438        for field_name in special_fields {
439            let field = LayoutChildType::Field(field_name.clone().into());
440            assert_eq!(field.name(), field_name);
441            assert_eq!(field.row_offset(), Some(0));
442        }
443    }
444}