vortex_layout/
layout.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::fmt::Debug;
6use std::fmt::Display;
7use std::fmt::Formatter;
8use std::sync::Arc;
9
10use arcref::ArcRef;
11use itertools::Itertools;
12use vortex_array::SerializeMetadata;
13use vortex_dtype::DType;
14use vortex_dtype::FieldName;
15use vortex_error::VortexExpect;
16use vortex_error::VortexResult;
17use vortex_error::vortex_err;
18use vortex_session::VortexSession;
19
20use crate::LayoutEncodingId;
21use crate::LayoutEncodingRef;
22use crate::LayoutReaderRef;
23use crate::VTable;
24use crate::display::DisplayLayoutTree;
25use crate::display::display_tree_with_segment_sizes;
26use crate::segments::SegmentId;
27use crate::segments::SegmentSource;
28
29pub type LayoutId = ArcRef<str>;
30
31pub type LayoutRef = Arc<dyn Layout>;
32
33pub trait Layout: 'static + Send + Sync + Debug + private::Sealed {
34    fn as_any(&self) -> &dyn Any;
35
36    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync>;
37
38    fn to_layout(&self) -> LayoutRef;
39
40    /// Returns the [`crate::LayoutEncoding`] for this layout.
41    fn encoding(&self) -> LayoutEncodingRef;
42
43    /// The number of rows in this layout.
44    fn row_count(&self) -> u64;
45
46    /// The dtype of this layout when projected with the root scope.
47    fn dtype(&self) -> &DType;
48
49    /// The number of children in this layout.
50    fn nchildren(&self) -> usize;
51
52    /// Get the child at the given index.
53    fn child(&self, idx: usize) -> VortexResult<LayoutRef>;
54
55    /// Get the relative row offset of the child at the given index, returning `None` for
56    /// any auxiliary children, e.g. dictionary values, zone maps, etc.
57    fn child_type(&self, idx: usize) -> LayoutChildType;
58
59    /// Get the metadata for this layout.
60    fn metadata(&self) -> Vec<u8>;
61
62    /// Get the segment IDs for this layout.
63    fn segment_ids(&self) -> Vec<SegmentId>;
64
65    #[cfg(gpu_unstable)]
66    fn new_gpu_reader(
67        &self,
68        name: Arc<str>,
69        segment_source: Arc<dyn SegmentSource>,
70        ctx: Arc<cudarc::driver::CudaContext>,
71    ) -> VortexResult<crate::gpu::GpuLayoutReaderRef>;
72
73    fn new_reader(
74        &self,
75        name: Arc<str>,
76        segment_source: Arc<dyn SegmentSource>,
77        session: &VortexSession,
78    ) -> VortexResult<LayoutReaderRef>;
79}
80
81pub trait IntoLayout {
82    /// Converts this type into a [`LayoutRef`].
83    fn into_layout(self) -> LayoutRef;
84}
85
86/// A type that allows us to identify how a layout child relates to its parent.
87#[derive(Debug, Clone, PartialEq, Eq)]
88pub enum LayoutChildType {
89    /// A layout child that retains the same schema and row offset position in the dataset.
90    Transparent(Arc<str>),
91    /// A layout child that provides auxiliary data, e.g. dictionary values, zone maps, etc.
92    /// Contains a human-readable name of the child.
93    Auxiliary(Arc<str>),
94    /// A layout child that represents a row-based chunk of data.
95    /// Contains the chunk index and relative row offset of the child.
96    Chunk((usize, u64)),
97    /// A layout child that represents a single field of data.
98    /// Contains the field name of the child.
99    Field(FieldName),
100}
101
102impl LayoutChildType {
103    /// Returns the name of this child.
104    pub fn name(&self) -> Arc<str> {
105        match self {
106            LayoutChildType::Chunk((idx, _offset)) => format!("[{idx}]").into(),
107            LayoutChildType::Auxiliary(name) => name.clone(),
108            LayoutChildType::Transparent(name) => name.clone(),
109            LayoutChildType::Field(name) => name.clone().into(),
110        }
111    }
112
113    /// Returns the relative row offset of this child.
114    /// For auxiliary children, this is `None`.
115    pub fn row_offset(&self) -> Option<u64> {
116        match self {
117            LayoutChildType::Chunk((_idx, offset)) => Some(*offset),
118            LayoutChildType::Auxiliary(_) => None,
119            LayoutChildType::Transparent(_) => Some(0),
120            LayoutChildType::Field(_) => Some(0),
121        }
122    }
123}
124
125impl dyn Layout + '_ {
126    /// The ID of the encoding for this layout.
127    pub fn encoding_id(&self) -> LayoutEncodingId {
128        self.encoding().id()
129    }
130
131    /// The children of this layout.
132    pub fn children(&self) -> VortexResult<Vec<LayoutRef>> {
133        (0..self.nchildren()).map(|i| self.child(i)).try_collect()
134    }
135
136    /// The child types of this layout.
137    pub fn child_types(&self) -> impl Iterator<Item = LayoutChildType> {
138        (0..self.nchildren()).map(|i| self.child_type(i))
139    }
140
141    /// The names of the children of this layout.
142    pub fn child_names(&self) -> impl Iterator<Item = Arc<str>> {
143        self.child_types().map(|child| child.name())
144    }
145
146    /// The row offsets of the children of this layout, where `None` indicates an auxiliary child.
147    pub fn child_row_offsets(&self) -> impl Iterator<Item = Option<u64>> {
148        self.child_types().map(|child| child.row_offset())
149    }
150
151    pub fn is<V: VTable>(&self) -> bool {
152        self.as_opt::<V>().is_some()
153    }
154
155    /// Downcast a layout to a specific type.
156    pub fn as_<V: VTable>(&self) -> &V::Layout {
157        self.as_opt::<V>().vortex_expect("Failed to downcast")
158    }
159
160    /// Downcast a layout to a specific type.
161    pub fn as_opt<V: VTable>(&self) -> Option<&V::Layout> {
162        self.as_any()
163            .downcast_ref::<LayoutAdapter<V>>()
164            .map(|adapter| &adapter.0)
165    }
166
167    /// Downcast a layout to a specific type.
168    pub fn into<V: VTable>(self: Arc<Self>) -> Arc<V::Layout> {
169        let layout_adapter = self
170            .as_any_arc()
171            .downcast::<LayoutAdapter<V>>()
172            .map_err(|_| vortex_err!("Invalid layout type"))
173            .vortex_expect("Invalid layout type");
174
175        // SAFETY: LayoutAdapter<V> is #[repr(transparent)] (see line 192) which guarantees
176        // it has the same memory layout as V::Layout. The downcast above ensures we have
177        // the correct type. This transmute is safe because both Arc types point to data
178        // with identical layout and alignment.
179        unsafe { std::mem::transmute::<Arc<LayoutAdapter<V>>, Arc<V::Layout>>(layout_adapter) }
180    }
181
182    /// Depth-first traversal of the layout and its children.
183    pub fn depth_first_traversal(&self) -> impl Iterator<Item = VortexResult<LayoutRef>> {
184        /// A depth-first pre-order iterator over a layout.
185        struct ChildrenIterator {
186            stack: Vec<LayoutRef>,
187        }
188
189        impl Iterator for ChildrenIterator {
190            type Item = VortexResult<LayoutRef>;
191
192            fn next(&mut self) -> Option<Self::Item> {
193                let next = self.stack.pop()?;
194                let Ok(children) = next.children() else {
195                    return Some(Ok(next));
196                };
197                for child in children.into_iter().rev() {
198                    self.stack.push(child);
199                }
200                Some(Ok(next))
201            }
202        }
203
204        ChildrenIterator {
205            stack: vec![self.to_layout()],
206        }
207    }
208
209    /// Display the layout as a tree structure.
210    pub fn display_tree(&self) -> DisplayLayoutTree {
211        DisplayLayoutTree::new(self.to_layout(), false)
212    }
213
214    /// Display the layout as a tree structure with optional verbose metadata.
215    pub fn display_tree_verbose(&self, verbose: bool) -> DisplayLayoutTree {
216        DisplayLayoutTree::new(self.to_layout(), verbose)
217    }
218
219    /// Display the layout as a tree structure, fetching segment buffer sizes from the segment source.
220    ///
221    /// # Warning
222    ///
223    /// This function performs IO to fetch each segment's buffer. For layouts with
224    /// many segments, this may result in significant IO overhead.
225    pub async fn display_tree_with_segments(
226        &self,
227        segment_source: Arc<dyn SegmentSource>,
228    ) -> VortexResult<DisplayLayoutTree> {
229        display_tree_with_segment_sizes(self.to_layout(), segment_source).await
230    }
231}
232
233/// Display the encoding, dtype, row count, and segment IDs of this layout.
234impl Display for dyn Layout + '_ {
235    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
236        let segment_ids = self.segment_ids();
237        if segment_ids.is_empty() {
238            write!(
239                f,
240                "{}({}, rows={})",
241                self.encoding_id(),
242                self.dtype(),
243                self.row_count()
244            )
245        } else {
246            write!(
247                f,
248                "{}({}, rows={}, segments=[{}])",
249                self.encoding_id(),
250                self.dtype(),
251                self.row_count(),
252                segment_ids
253                    .iter()
254                    .map(|s| format!("{}", **s))
255                    .collect::<Vec<_>>()
256                    .join(", ")
257            )
258        }
259    }
260}
261
262#[repr(transparent)]
263pub struct LayoutAdapter<V: VTable>(V::Layout);
264
265impl<V: VTable> Debug for LayoutAdapter<V> {
266    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
267        self.0.fmt(f)
268    }
269}
270
271impl<V: VTable> Layout for LayoutAdapter<V> {
272    fn as_any(&self) -> &dyn Any {
273        self
274    }
275
276    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
277        self
278    }
279
280    fn to_layout(&self) -> LayoutRef {
281        Arc::new(LayoutAdapter::<V>(self.0.clone()))
282    }
283
284    fn encoding(&self) -> LayoutEncodingRef {
285        V::encoding(&self.0)
286    }
287
288    fn row_count(&self) -> u64 {
289        V::row_count(&self.0)
290    }
291
292    fn dtype(&self) -> &DType {
293        V::dtype(&self.0)
294    }
295
296    fn nchildren(&self) -> usize {
297        V::nchildren(&self.0)
298    }
299
300    fn child(&self, idx: usize) -> VortexResult<LayoutRef> {
301        V::child(&self.0, idx)
302    }
303
304    fn child_type(&self, idx: usize) -> LayoutChildType {
305        V::child_type(&self.0, idx)
306    }
307
308    fn metadata(&self) -> Vec<u8> {
309        V::metadata(&self.0).serialize()
310    }
311
312    fn segment_ids(&self) -> Vec<SegmentId> {
313        V::segment_ids(&self.0)
314    }
315
316    #[cfg(gpu_unstable)]
317    fn new_gpu_reader(
318        &self,
319        name: Arc<str>,
320        segment_source: Arc<dyn SegmentSource>,
321        ctx: Arc<cudarc::driver::CudaContext>,
322    ) -> VortexResult<crate::gpu::GpuLayoutReaderRef> {
323        V::new_gpu_reader(&self.0, name, segment_source, ctx)
324    }
325
326    fn new_reader(
327        &self,
328        name: Arc<str>,
329        segment_source: Arc<dyn SegmentSource>,
330        session: &VortexSession,
331    ) -> VortexResult<LayoutReaderRef> {
332        V::new_reader(&self.0, name, segment_source, session)
333    }
334}
335
336mod private {
337    use super::*;
338
339    pub trait Sealed {}
340
341    impl<V: VTable> Sealed for LayoutAdapter<V> {}
342}
343
344#[cfg(test)]
345mod tests {
346    use rstest::rstest;
347
348    use super::*;
349
350    #[test]
351    fn test_layout_child_type_name() {
352        // Test Chunk variant
353        let chunk = LayoutChildType::Chunk((5, 100));
354        assert_eq!(chunk.name().as_ref(), "[5]");
355
356        // Test Field variant
357        let field = LayoutChildType::Field(FieldName::from("customer_id"));
358        assert_eq!(field.name().as_ref(), "customer_id");
359
360        // Test Auxiliary variant
361        let aux = LayoutChildType::Auxiliary(Arc::from("zone_map"));
362        assert_eq!(aux.name().as_ref(), "zone_map");
363
364        // Test Transparent variant
365        let transparent = LayoutChildType::Transparent(Arc::from("compressed"));
366        assert_eq!(transparent.name().as_ref(), "compressed");
367    }
368
369    #[test]
370    fn test_layout_child_type_row_offset() {
371        // Chunk should return the offset
372        let chunk = LayoutChildType::Chunk((0, 42));
373        assert_eq!(chunk.row_offset(), Some(42));
374
375        // Field should return 0
376        let field = LayoutChildType::Field(FieldName::from("field1"));
377        assert_eq!(field.row_offset(), Some(0));
378
379        // Auxiliary should return None
380        let aux = LayoutChildType::Auxiliary(Arc::from("metadata"));
381        assert_eq!(aux.row_offset(), None);
382
383        // Transparent should return 0
384        let transparent = LayoutChildType::Transparent(Arc::from("wrapper"));
385        assert_eq!(transparent.row_offset(), Some(0));
386    }
387
388    #[test]
389    fn test_layout_child_type_equality() {
390        // Test Chunk equality
391        let chunk1 = LayoutChildType::Chunk((1, 100));
392        let chunk2 = LayoutChildType::Chunk((1, 100));
393        let chunk3 = LayoutChildType::Chunk((2, 100));
394        let chunk4 = LayoutChildType::Chunk((1, 200));
395
396        assert_eq!(chunk1, chunk2);
397        assert_ne!(chunk1, chunk3);
398        assert_ne!(chunk1, chunk4);
399
400        // Test Field equality
401        let field1 = LayoutChildType::Field(FieldName::from("name"));
402        let field2 = LayoutChildType::Field(FieldName::from("name"));
403        let field3 = LayoutChildType::Field(FieldName::from("age"));
404
405        assert_eq!(field1, field2);
406        assert_ne!(field1, field3);
407
408        // Test Auxiliary equality
409        let aux1 = LayoutChildType::Auxiliary(Arc::from("stats"));
410        let aux2 = LayoutChildType::Auxiliary(Arc::from("stats"));
411        let aux3 = LayoutChildType::Auxiliary(Arc::from("index"));
412
413        assert_eq!(aux1, aux2);
414        assert_ne!(aux1, aux3);
415
416        // Test Transparent equality
417        let trans1 = LayoutChildType::Transparent(Arc::from("enc"));
418        let trans2 = LayoutChildType::Transparent(Arc::from("enc"));
419        let trans3 = LayoutChildType::Transparent(Arc::from("dec"));
420
421        assert_eq!(trans1, trans2);
422        assert_ne!(trans1, trans3);
423
424        // Test cross-variant inequality
425        assert_ne!(chunk1, field1);
426        assert_ne!(field1, aux1);
427        assert_ne!(aux1, trans1);
428    }
429
430    #[rstest]
431    #[case(LayoutChildType::Chunk((0, 0)), "[0]", Some(0))]
432    #[case(LayoutChildType::Chunk((999, 1000000)), "[999]", Some(1000000))]
433    #[case(LayoutChildType::Field(FieldName::from("")), "", Some(0))]
434    #[case(
435        LayoutChildType::Field(FieldName::from("very_long_field_name_that_is_quite_lengthy")),
436        "very_long_field_name_that_is_quite_lengthy",
437        Some(0)
438    )]
439    #[case(LayoutChildType::Auxiliary(Arc::from("aux")), "aux", None)]
440    #[case(LayoutChildType::Transparent(Arc::from("t")), "t", Some(0))]
441    fn test_layout_child_type_parameterized(
442        #[case] child_type: LayoutChildType,
443        #[case] expected_name: &str,
444        #[case] expected_offset: Option<u64>,
445    ) {
446        assert_eq!(child_type.name().as_ref(), expected_name);
447        assert_eq!(child_type.row_offset(), expected_offset);
448    }
449
450    #[test]
451    fn test_chunk_with_different_indices_and_offsets() {
452        let chunks = [
453            LayoutChildType::Chunk((0, 0)),
454            LayoutChildType::Chunk((1, 100)),
455            LayoutChildType::Chunk((2, 200)),
456            LayoutChildType::Chunk((100, 10000)),
457        ];
458
459        for chunk in chunks.iter() {
460            let name = chunk.name();
461            assert!(name.starts_with('['));
462            assert!(name.ends_with(']'));
463
464            if let LayoutChildType::Chunk((idx, offset)) = chunk {
465                assert_eq!(name.as_ref(), format!("[{}]", idx));
466                assert_eq!(chunk.row_offset(), Some(*offset));
467            }
468        }
469    }
470
471    #[test]
472    fn test_field_names_with_special_characters() {
473        let special_fields: Vec<Arc<str>> = vec![
474            Arc::from("field-with-dashes"),
475            Arc::from("field_with_underscores"),
476            Arc::from("field.with.dots"),
477            Arc::from("field::with::colons"),
478            Arc::from("field/with/slashes"),
479            Arc::from("field@with#symbols"),
480        ];
481
482        for field_name in special_fields {
483            let field = LayoutChildType::Field(field_name.clone().into());
484            assert_eq!(field.name(), field_name);
485            assert_eq!(field.row_offset(), Some(0));
486        }
487    }
488
489    #[test]
490    fn test_struct_layout_display() {
491        use vortex_array::ArrayContext;
492        use vortex_dtype::Nullability::NonNullable;
493        use vortex_dtype::PType;
494        use vortex_dtype::StructFields;
495
496        use crate::IntoLayout;
497        use crate::layouts::chunked::ChunkedLayout;
498        use crate::layouts::dict::DictLayout;
499        use crate::layouts::flat::FlatLayout;
500        use crate::layouts::struct_::StructLayout;
501        use crate::segments::SegmentId;
502
503        let ctx = ArrayContext::empty();
504
505        // Create a flat layout for dict values (utf8 strings)
506        let dict_values =
507            FlatLayout::new(3, DType::Utf8(NonNullable), SegmentId::from(0), ctx.clone())
508                .into_layout();
509
510        // Test flat layout display shows segment
511        assert_eq!(
512            format!("{}", dict_values),
513            "vortex.flat(utf8, rows=3, segments=[0])"
514        );
515
516        // Create a flat layout for dict codes
517        let dict_codes = FlatLayout::new(
518            10,
519            DType::Primitive(PType::U16, NonNullable),
520            SegmentId::from(1),
521            ctx.clone(),
522        )
523        .into_layout();
524
525        // Test flat layout display shows segment
526        assert_eq!(
527            format!("{}", dict_codes),
528            "vortex.flat(u16, rows=10, segments=[1])"
529        );
530
531        // Create dict layout (column "name")
532        let dict_layout = DictLayout::new(dict_values.clone(), dict_codes.clone()).into_layout();
533
534        // Test dict layout display (no direct segments)
535        assert_eq!(format!("{}", dict_layout), "vortex.dict(utf8, rows=10)");
536
537        // Create flat layouts for chunks
538        let chunk1 = FlatLayout::new(
539            5,
540            DType::Primitive(PType::I64, NonNullable),
541            SegmentId::from(2),
542            ctx.clone(),
543        )
544        .into_layout();
545
546        let chunk2 = FlatLayout::new(
547            5,
548            DType::Primitive(PType::I64, NonNullable),
549            SegmentId::from(3),
550            ctx,
551        )
552        .into_layout();
553
554        // Create chunked layout (column "value")
555        let chunked_layout = ChunkedLayout::new(
556            10,
557            DType::Primitive(PType::I64, NonNullable),
558            crate::OwnedLayoutChildren::layout_children(vec![chunk1.clone(), chunk2.clone()]),
559        )
560        .into_layout();
561
562        // Test chunked layout display (no direct segments)
563        assert_eq!(
564            format!("{}", chunked_layout),
565            "vortex.chunked(i64, rows=10)"
566        );
567
568        // Test chunk displays show segments
569        assert_eq!(
570            format!("{}", chunk1),
571            "vortex.flat(i64, rows=5, segments=[2])"
572        );
573        assert_eq!(
574            format!("{}", chunk2),
575            "vortex.flat(i64, rows=5, segments=[3])"
576        );
577
578        // Create struct layout with two fields
579        let field_names: Vec<Arc<str>> = vec!["name".into(), "value".into()];
580        let struct_dtype = DType::Struct(
581            StructFields::new(
582                field_names.into(),
583                vec![
584                    DType::Utf8(NonNullable),
585                    DType::Primitive(PType::I64, NonNullable),
586                ],
587            ),
588            NonNullable,
589        );
590
591        let struct_layout =
592            StructLayout::new(10, struct_dtype, vec![dict_layout, chunked_layout]).into_layout();
593
594        println!("{}", struct_layout.display_tree_verbose(true));
595
596        // Test Display impl for struct (no direct segments)
597        assert_eq!(
598            format!("{}", struct_layout),
599            "vortex.struct({name=utf8, value=i64}, rows=10)"
600        );
601    }
602}