vortex_layout/
layout.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::fmt::{Debug, Formatter};
6use std::sync::Arc;
7
8use arcref::ArcRef;
9use itertools::Itertools;
10use vortex_array::SerializeMetadata;
11use vortex_dtype::{DType, FieldName};
12use vortex_error::{VortexExpect, VortexResult, vortex_err};
13
14use crate::display::DisplayLayoutTree;
15use crate::segments::{SegmentId, SegmentSource};
16use crate::{LayoutEncodingId, LayoutEncodingRef, LayoutReaderRef, VTable};
17
18pub type LayoutId = ArcRef<str>;
19
20pub type LayoutRef = Arc<dyn Layout>;
21
22pub trait Layout: 'static + Send + Sync + Debug + private::Sealed {
23    fn as_any(&self) -> &dyn Any;
24
25    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync>;
26
27    fn to_layout(&self) -> LayoutRef;
28
29    /// Returns the [`crate::LayoutEncoding`] for this layout.
30    fn encoding(&self) -> LayoutEncodingRef;
31
32    /// The number of rows in this layout.
33    fn row_count(&self) -> u64;
34
35    /// The dtype of this layout when projected with the root scope.
36    fn dtype(&self) -> &DType;
37
38    /// The number of children in this layout.
39    fn nchildren(&self) -> usize;
40
41    /// Get the child at the given index.
42    fn child(&self, idx: usize) -> VortexResult<LayoutRef>;
43
44    /// Get the relative row offset of the child at the given index, returning `None` for
45    /// any auxiliary children, e.g. dictionary values, zone maps, etc.
46    fn child_type(&self, idx: usize) -> LayoutChildType;
47
48    /// Get the metadata for this layout.
49    fn metadata(&self) -> Vec<u8>;
50
51    /// Get the segment IDs for this layout.
52    fn segment_ids(&self) -> Vec<SegmentId>;
53
54    #[cfg(gpu_unstable)]
55    fn new_gpu_reader(
56        &self,
57        name: Arc<str>,
58        segment_source: Arc<dyn SegmentSource>,
59        ctx: Arc<cudarc::driver::CudaContext>,
60    ) -> VortexResult<crate::gpu::GpuLayoutReaderRef>;
61
62    fn new_reader(
63        &self,
64        name: Arc<str>,
65        segment_source: Arc<dyn SegmentSource>,
66    ) -> VortexResult<LayoutReaderRef>;
67}
68
69pub trait IntoLayout {
70    /// Converts this type into a [`LayoutRef`].
71    fn into_layout(self) -> LayoutRef;
72}
73
74/// A type that allows us to identify how a layout child relates to its parent.
75#[derive(Debug, Clone, PartialEq, Eq)]
76pub enum LayoutChildType {
77    /// A layout child that retains the same schema and row offset position in the dataset.
78    Transparent(Arc<str>),
79    /// A layout child that provides auxiliary data, e.g. dictionary values, zone maps, etc.
80    /// Contains a human-readable name of the child.
81    Auxiliary(Arc<str>),
82    /// A layout child that represents a row-based chunk of data.
83    /// Contains the chunk index and relative row offset of the child.
84    Chunk((usize, u64)),
85    /// A layout child that represents a single field of data.
86    /// Contains the field name of the child.
87    Field(FieldName),
88}
89
90impl LayoutChildType {
91    /// Returns the name of this child.
92    pub fn name(&self) -> Arc<str> {
93        match self {
94            LayoutChildType::Chunk((idx, _offset)) => format!("[{idx}]").into(),
95            LayoutChildType::Auxiliary(name) => name.clone(),
96            LayoutChildType::Transparent(name) => name.clone(),
97            LayoutChildType::Field(name) => name.clone().into(),
98        }
99    }
100
101    /// Returns the relative row offset of this child.
102    /// For auxiliary children, this is `None`.
103    pub fn row_offset(&self) -> Option<u64> {
104        match self {
105            LayoutChildType::Chunk((_idx, offset)) => Some(*offset),
106            LayoutChildType::Auxiliary(_) => None,
107            LayoutChildType::Transparent(_) => Some(0),
108            LayoutChildType::Field(_) => Some(0),
109        }
110    }
111}
112
113impl dyn Layout + '_ {
114    /// The ID of the encoding for this layout.
115    pub fn encoding_id(&self) -> LayoutEncodingId {
116        self.encoding().id()
117    }
118
119    /// The children of this layout.
120    pub fn children(&self) -> VortexResult<Vec<LayoutRef>> {
121        (0..self.nchildren()).map(|i| self.child(i)).try_collect()
122    }
123
124    /// The child types of this layout.
125    pub fn child_types(&self) -> impl Iterator<Item = LayoutChildType> {
126        (0..self.nchildren()).map(|i| self.child_type(i))
127    }
128
129    /// The names of the children of this layout.
130    pub fn child_names(&self) -> impl Iterator<Item = Arc<str>> {
131        self.child_types().map(|child| child.name())
132    }
133
134    /// The row offsets of the children of this layout, where `None` indicates an auxiliary child.
135    pub fn child_row_offsets(&self) -> impl Iterator<Item = Option<u64>> {
136        self.child_types().map(|child| child.row_offset())
137    }
138
139    pub fn is<V: VTable>(&self) -> bool {
140        self.as_opt::<V>().is_some()
141    }
142
143    /// Downcast a layout to a specific type.
144    pub fn as_<V: VTable>(&self) -> &V::Layout {
145        self.as_opt::<V>().vortex_expect("Failed to downcast")
146    }
147
148    /// Downcast a layout to a specific type.
149    pub fn as_opt<V: VTable>(&self) -> Option<&V::Layout> {
150        self.as_any()
151            .downcast_ref::<LayoutAdapter<V>>()
152            .map(|adapter| &adapter.0)
153    }
154
155    /// Downcast a layout to a specific type.
156    pub fn into<V: VTable>(self: Arc<Self>) -> Arc<V::Layout> {
157        let layout_adapter = self
158            .as_any_arc()
159            .downcast::<LayoutAdapter<V>>()
160            .map_err(|_| vortex_err!("Invalid layout type"))
161            .vortex_expect("Invalid layout type");
162
163        // SAFETY: LayoutAdapter<V> is #[repr(transparent)] (see line 192) which guarantees
164        // it has the same memory layout as V::Layout. The downcast above ensures we have
165        // the correct type. This transmute is safe because both Arc types point to data
166        // with identical layout and alignment.
167        unsafe { std::mem::transmute::<Arc<LayoutAdapter<V>>, Arc<V::Layout>>(layout_adapter) }
168    }
169
170    /// Depth-first traversal of the layout and its children.
171    pub fn depth_first_traversal(&self) -> impl Iterator<Item = VortexResult<LayoutRef>> {
172        /// A depth-first pre-order iterator over a layout.
173        struct ChildrenIterator {
174            stack: Vec<LayoutRef>,
175        }
176
177        impl Iterator for ChildrenIterator {
178            type Item = VortexResult<LayoutRef>;
179
180            fn next(&mut self) -> Option<Self::Item> {
181                let next = self.stack.pop()?;
182                let Ok(children) = next.children() else {
183                    return Some(Ok(next));
184                };
185                for child in children.into_iter().rev() {
186                    self.stack.push(child);
187                }
188                Some(Ok(next))
189            }
190        }
191
192        ChildrenIterator {
193            stack: vec![self.to_layout()],
194        }
195    }
196
197    /// Display the layout as a tree structure.
198    pub fn display_tree(&self) -> DisplayLayoutTree {
199        DisplayLayoutTree::new(self.to_layout(), false)
200    }
201
202    /// Display the layout as a tree structure with optional verbose metadata.
203    pub fn display_tree_verbose(&self, verbose: bool) -> DisplayLayoutTree {
204        DisplayLayoutTree::new(self.to_layout(), verbose)
205    }
206}
207
208#[repr(transparent)]
209pub struct LayoutAdapter<V: VTable>(V::Layout);
210
211impl<V: VTable> Debug for LayoutAdapter<V> {
212    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
213        self.0.fmt(f)
214    }
215}
216
217impl<V: VTable> Layout for LayoutAdapter<V> {
218    fn as_any(&self) -> &dyn Any {
219        self
220    }
221
222    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
223        self
224    }
225
226    fn to_layout(&self) -> LayoutRef {
227        Arc::new(LayoutAdapter::<V>(self.0.clone()))
228    }
229
230    fn encoding(&self) -> LayoutEncodingRef {
231        V::encoding(&self.0)
232    }
233
234    fn row_count(&self) -> u64 {
235        V::row_count(&self.0)
236    }
237
238    fn dtype(&self) -> &DType {
239        V::dtype(&self.0)
240    }
241
242    fn nchildren(&self) -> usize {
243        V::nchildren(&self.0)
244    }
245
246    fn child(&self, idx: usize) -> VortexResult<LayoutRef> {
247        V::child(&self.0, idx)
248    }
249
250    fn child_type(&self, idx: usize) -> LayoutChildType {
251        V::child_type(&self.0, idx)
252    }
253
254    fn metadata(&self) -> Vec<u8> {
255        V::metadata(&self.0).serialize()
256    }
257
258    fn segment_ids(&self) -> Vec<SegmentId> {
259        V::segment_ids(&self.0)
260    }
261
262    #[cfg(gpu_unstable)]
263    fn new_gpu_reader(
264        &self,
265        name: Arc<str>,
266        segment_source: Arc<dyn SegmentSource>,
267        ctx: Arc<cudarc::driver::CudaContext>,
268    ) -> VortexResult<crate::gpu::GpuLayoutReaderRef> {
269        V::new_gpu_reader(&self.0, name, segment_source, ctx)
270    }
271
272    fn new_reader(
273        &self,
274        name: Arc<str>,
275        segment_source: Arc<dyn SegmentSource>,
276    ) -> VortexResult<LayoutReaderRef> {
277        V::new_reader(&self.0, name, segment_source)
278    }
279}
280
281mod private {
282    use super::*;
283
284    pub trait Sealed {}
285
286    impl<V: VTable> Sealed for LayoutAdapter<V> {}
287}
288
289#[cfg(test)]
290mod tests {
291    use rstest::rstest;
292
293    use super::*;
294
295    #[test]
296    fn test_layout_child_type_name() {
297        // Test Chunk variant
298        let chunk = LayoutChildType::Chunk((5, 100));
299        assert_eq!(chunk.name().as_ref(), "[5]");
300
301        // Test Field variant
302        let field = LayoutChildType::Field(FieldName::from("customer_id"));
303        assert_eq!(field.name().as_ref(), "customer_id");
304
305        // Test Auxiliary variant
306        let aux = LayoutChildType::Auxiliary(Arc::from("zone_map"));
307        assert_eq!(aux.name().as_ref(), "zone_map");
308
309        // Test Transparent variant
310        let transparent = LayoutChildType::Transparent(Arc::from("compressed"));
311        assert_eq!(transparent.name().as_ref(), "compressed");
312    }
313
314    #[test]
315    fn test_layout_child_type_row_offset() {
316        // Chunk should return the offset
317        let chunk = LayoutChildType::Chunk((0, 42));
318        assert_eq!(chunk.row_offset(), Some(42));
319
320        // Field should return 0
321        let field = LayoutChildType::Field(FieldName::from("field1"));
322        assert_eq!(field.row_offset(), Some(0));
323
324        // Auxiliary should return None
325        let aux = LayoutChildType::Auxiliary(Arc::from("metadata"));
326        assert_eq!(aux.row_offset(), None);
327
328        // Transparent should return 0
329        let transparent = LayoutChildType::Transparent(Arc::from("wrapper"));
330        assert_eq!(transparent.row_offset(), Some(0));
331    }
332
333    #[test]
334    fn test_layout_child_type_equality() {
335        // Test Chunk equality
336        let chunk1 = LayoutChildType::Chunk((1, 100));
337        let chunk2 = LayoutChildType::Chunk((1, 100));
338        let chunk3 = LayoutChildType::Chunk((2, 100));
339        let chunk4 = LayoutChildType::Chunk((1, 200));
340
341        assert_eq!(chunk1, chunk2);
342        assert_ne!(chunk1, chunk3);
343        assert_ne!(chunk1, chunk4);
344
345        // Test Field equality
346        let field1 = LayoutChildType::Field(FieldName::from("name"));
347        let field2 = LayoutChildType::Field(FieldName::from("name"));
348        let field3 = LayoutChildType::Field(FieldName::from("age"));
349
350        assert_eq!(field1, field2);
351        assert_ne!(field1, field3);
352
353        // Test Auxiliary equality
354        let aux1 = LayoutChildType::Auxiliary(Arc::from("stats"));
355        let aux2 = LayoutChildType::Auxiliary(Arc::from("stats"));
356        let aux3 = LayoutChildType::Auxiliary(Arc::from("index"));
357
358        assert_eq!(aux1, aux2);
359        assert_ne!(aux1, aux3);
360
361        // Test Transparent equality
362        let trans1 = LayoutChildType::Transparent(Arc::from("enc"));
363        let trans2 = LayoutChildType::Transparent(Arc::from("enc"));
364        let trans3 = LayoutChildType::Transparent(Arc::from("dec"));
365
366        assert_eq!(trans1, trans2);
367        assert_ne!(trans1, trans3);
368
369        // Test cross-variant inequality
370        assert_ne!(chunk1, field1);
371        assert_ne!(field1, aux1);
372        assert_ne!(aux1, trans1);
373    }
374
375    #[rstest]
376    #[case(LayoutChildType::Chunk((0, 0)), "[0]", Some(0))]
377    #[case(LayoutChildType::Chunk((999, 1000000)), "[999]", Some(1000000))]
378    #[case(LayoutChildType::Field(FieldName::from("")), "", Some(0))]
379    #[case(
380        LayoutChildType::Field(FieldName::from("very_long_field_name_that_is_quite_lengthy")),
381        "very_long_field_name_that_is_quite_lengthy",
382        Some(0)
383    )]
384    #[case(LayoutChildType::Auxiliary(Arc::from("aux")), "aux", None)]
385    #[case(LayoutChildType::Transparent(Arc::from("t")), "t", Some(0))]
386    fn test_layout_child_type_parameterized(
387        #[case] child_type: LayoutChildType,
388        #[case] expected_name: &str,
389        #[case] expected_offset: Option<u64>,
390    ) {
391        assert_eq!(child_type.name().as_ref(), expected_name);
392        assert_eq!(child_type.row_offset(), expected_offset);
393    }
394
395    #[test]
396    fn test_chunk_with_different_indices_and_offsets() {
397        let chunks = [
398            LayoutChildType::Chunk((0, 0)),
399            LayoutChildType::Chunk((1, 100)),
400            LayoutChildType::Chunk((2, 200)),
401            LayoutChildType::Chunk((100, 10000)),
402        ];
403
404        for chunk in chunks.iter() {
405            let name = chunk.name();
406            assert!(name.starts_with('['));
407            assert!(name.ends_with(']'));
408
409            if let LayoutChildType::Chunk((idx, offset)) = chunk {
410                assert_eq!(name.as_ref(), format!("[{}]", idx));
411                assert_eq!(chunk.row_offset(), Some(*offset));
412            }
413        }
414    }
415
416    #[test]
417    fn test_field_names_with_special_characters() {
418        let special_fields: Vec<Arc<str>> = vec![
419            Arc::from("field-with-dashes"),
420            Arc::from("field_with_underscores"),
421            Arc::from("field.with.dots"),
422            Arc::from("field::with::colons"),
423            Arc::from("field/with/slashes"),
424            Arc::from("field@with#symbols"),
425        ];
426
427        for field_name in special_fields {
428            let field = LayoutChildType::Field(field_name.clone().into());
429            assert_eq!(field.name(), field_name);
430            assert_eq!(field.row_offset(), Some(0));
431        }
432    }
433}