Skip to main content

vortex_layout/
layout.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::fmt::Debug;
6use std::fmt::Display;
7use std::fmt::Formatter;
8use std::sync::Arc;
9
10use itertools::Itertools;
11use vortex_array::SerializeMetadata;
12use vortex_array::dtype::DType;
13use vortex_array::dtype::FieldName;
14use vortex_error::VortexExpect;
15use vortex_error::VortexResult;
16use vortex_error::vortex_err;
17use vortex_session::VortexSession;
18use vortex_session::registry::Id;
19
20use crate::LayoutEncodingId;
21use crate::LayoutEncodingRef;
22use crate::LayoutReaderRef;
23use crate::VTable;
24use crate::display::DisplayLayoutTree;
25use crate::display::display_tree_with_segment_sizes;
26use crate::segments::SegmentId;
27use crate::segments::SegmentSource;
28
29/// A unique identifier for a layout.
30pub type LayoutId = Id;
31
32pub type LayoutRef = Arc<dyn Layout>;
33
34pub trait Layout: 'static + Send + Sync + Debug + private::Sealed {
35    fn as_any(&self) -> &dyn Any;
36
37    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync>;
38
39    fn to_layout(&self) -> LayoutRef;
40
41    /// Returns the [`crate::LayoutEncoding`] for this layout.
42    fn encoding(&self) -> LayoutEncodingRef;
43
44    /// The number of rows in this layout.
45    fn row_count(&self) -> u64;
46
47    /// The dtype of this layout when projected with the root scope.
48    fn dtype(&self) -> &DType;
49
50    /// The number of children in this layout.
51    fn nchildren(&self) -> usize;
52
53    /// Get the child at the given index.
54    fn child(&self, idx: usize) -> VortexResult<LayoutRef>;
55
56    /// Get the relative row offset of the child at the given index, returning `None` for
57    /// any auxiliary children, e.g. dictionary values, zone maps, etc.
58    fn child_type(&self, idx: usize) -> LayoutChildType;
59
60    /// Get the metadata for this layout.
61    fn metadata(&self) -> Vec<u8>;
62
63    /// Get the segment IDs for this layout.
64    fn segment_ids(&self) -> Vec<SegmentId>;
65
66    fn new_reader(
67        &self,
68        name: Arc<str>,
69        segment_source: Arc<dyn SegmentSource>,
70        session: &VortexSession,
71    ) -> VortexResult<LayoutReaderRef>;
72}
73
74pub trait IntoLayout {
75    /// Converts this type into a [`LayoutRef`].
76    fn into_layout(self) -> LayoutRef;
77}
78
79/// A type that allows us to identify how a layout child relates to its parent.
80#[derive(Debug, Clone, PartialEq, Eq)]
81pub enum LayoutChildType {
82    /// A layout child that retains the same schema and row offset position in the dataset.
83    Transparent(Arc<str>),
84    /// A layout child that provides auxiliary data, e.g. dictionary values, zone maps, etc.
85    /// Contains a human-readable name of the child.
86    Auxiliary(Arc<str>),
87    /// A layout child that represents a row-based chunk of data.
88    /// Contains the chunk index and relative row offset of the child.
89    Chunk((usize, u64)),
90    /// A layout child that represents a single field of data.
91    /// Contains the field name of the child.
92    Field(FieldName),
93}
94
95impl LayoutChildType {
96    /// Returns the name of this child.
97    pub fn name(&self) -> Arc<str> {
98        match self {
99            LayoutChildType::Chunk((idx, _offset)) => format!("[{idx}]").into(),
100            LayoutChildType::Auxiliary(name) => Arc::clone(name),
101            LayoutChildType::Transparent(name) => Arc::clone(name),
102            LayoutChildType::Field(name) => name.clone().into(),
103        }
104    }
105
106    /// Returns the relative row offset of this child.
107    /// For auxiliary children, this is `None`.
108    pub fn row_offset(&self) -> Option<u64> {
109        match self {
110            LayoutChildType::Chunk((_idx, offset)) => Some(*offset),
111            LayoutChildType::Auxiliary(_) => None,
112            LayoutChildType::Transparent(_) => Some(0),
113            LayoutChildType::Field(_) => Some(0),
114        }
115    }
116}
117
118impl dyn Layout + '_ {
119    /// The ID of the encoding for this layout.
120    pub fn encoding_id(&self) -> LayoutEncodingId {
121        self.encoding().id()
122    }
123
124    /// The children of this layout.
125    pub fn children(&self) -> VortexResult<Vec<LayoutRef>> {
126        (0..self.nchildren()).map(|i| self.child(i)).try_collect()
127    }
128
129    /// The child types of this layout.
130    pub fn child_types(&self) -> impl Iterator<Item = LayoutChildType> {
131        (0..self.nchildren()).map(|i| self.child_type(i))
132    }
133
134    /// The names of the children of this layout.
135    pub fn child_names(&self) -> impl Iterator<Item = Arc<str>> {
136        self.child_types().map(|child| child.name())
137    }
138
139    /// The row offsets of the children of this layout, where `None` indicates an auxiliary child.
140    pub fn child_row_offsets(&self) -> impl Iterator<Item = Option<u64>> {
141        self.child_types().map(|child| child.row_offset())
142    }
143
144    pub fn is<V: VTable>(&self) -> bool {
145        self.as_opt::<V>().is_some()
146    }
147
148    /// Downcast a layout to a specific type.
149    pub fn as_<V: VTable>(&self) -> &V::Layout {
150        self.as_opt::<V>().vortex_expect("Failed to downcast")
151    }
152
153    /// Downcast a layout to a specific type.
154    pub fn as_opt<V: VTable>(&self) -> Option<&V::Layout> {
155        self.as_any()
156            .downcast_ref::<LayoutAdapter<V>>()
157            .map(|adapter| &adapter.0)
158    }
159
160    /// Downcast a layout to a specific type.
161    pub fn into<V: VTable>(self: Arc<Self>) -> Arc<V::Layout> {
162        let layout_adapter = self
163            .as_any_arc()
164            .downcast::<LayoutAdapter<V>>()
165            .map_err(|_| vortex_err!("Invalid layout type"))
166            .vortex_expect("Invalid layout type");
167
168        // SAFETY: LayoutAdapter<V> is #[repr(transparent)] (see line 192) which guarantees
169        // it has the same memory layout as V::Layout. The downcast above ensures we have
170        // the correct type. This transmute is safe because both Arc types point to data
171        // with identical layout and alignment.
172        unsafe { std::mem::transmute::<Arc<LayoutAdapter<V>>, Arc<V::Layout>>(layout_adapter) }
173    }
174
175    /// Depth-first traversal of the layout and its children.
176    pub fn depth_first_traversal(&self) -> impl Iterator<Item = VortexResult<LayoutRef>> {
177        /// A depth-first pre-order iterator over a layout.
178        struct ChildrenIterator {
179            stack: Vec<LayoutRef>,
180        }
181
182        impl Iterator for ChildrenIterator {
183            type Item = VortexResult<LayoutRef>;
184
185            fn next(&mut self) -> Option<Self::Item> {
186                let next = self.stack.pop()?;
187                let Ok(children) = next.children() else {
188                    return Some(Ok(next));
189                };
190                for child in children.into_iter().rev() {
191                    self.stack.push(child);
192                }
193                Some(Ok(next))
194            }
195        }
196
197        ChildrenIterator {
198            stack: vec![self.to_layout()],
199        }
200    }
201
202    /// Display the layout as a tree structure.
203    pub fn display_tree(&self) -> DisplayLayoutTree {
204        DisplayLayoutTree::new(self.to_layout(), false)
205    }
206
207    /// Display the layout as a tree structure with optional verbose metadata.
208    pub fn display_tree_verbose(&self, verbose: bool) -> DisplayLayoutTree {
209        DisplayLayoutTree::new(self.to_layout(), verbose)
210    }
211
212    /// Display the layout as a tree structure, fetching segment buffer sizes from the segment source.
213    ///
214    /// # Warning
215    ///
216    /// This function performs IO to fetch each segment's buffer. For layouts with
217    /// many segments, this may result in significant IO overhead.
218    pub async fn display_tree_with_segments(
219        &self,
220        segment_source: Arc<dyn SegmentSource>,
221    ) -> VortexResult<DisplayLayoutTree> {
222        display_tree_with_segment_sizes(self.to_layout(), segment_source).await
223    }
224}
225
226/// Display the encoding, dtype, row count, and segment IDs of this layout.
227impl Display for dyn Layout + '_ {
228    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
229        let segment_ids = self.segment_ids();
230        if segment_ids.is_empty() {
231            write!(
232                f,
233                "{}({}, rows={})",
234                self.encoding_id(),
235                self.dtype(),
236                self.row_count()
237            )
238        } else {
239            write!(
240                f,
241                "{}({}, rows={}, segments=[{}])",
242                self.encoding_id(),
243                self.dtype(),
244                self.row_count(),
245                segment_ids
246                    .iter()
247                    .map(|s| format!("{}", **s))
248                    .collect::<Vec<_>>()
249                    .join(", ")
250            )
251        }
252    }
253}
254
255#[repr(transparent)]
256pub struct LayoutAdapter<V: VTable>(V::Layout);
257
258impl<V: VTable> Debug for LayoutAdapter<V> {
259    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
260        self.0.fmt(f)
261    }
262}
263
264impl<V: VTable> Layout for LayoutAdapter<V> {
265    fn as_any(&self) -> &dyn Any {
266        self
267    }
268
269    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
270        self
271    }
272
273    fn to_layout(&self) -> LayoutRef {
274        Arc::new(LayoutAdapter::<V>(self.0.clone()))
275    }
276
277    fn encoding(&self) -> LayoutEncodingRef {
278        V::encoding(&self.0)
279    }
280
281    fn row_count(&self) -> u64 {
282        V::row_count(&self.0)
283    }
284
285    fn dtype(&self) -> &DType {
286        V::dtype(&self.0)
287    }
288
289    fn nchildren(&self) -> usize {
290        V::nchildren(&self.0)
291    }
292
293    fn child(&self, idx: usize) -> VortexResult<LayoutRef> {
294        V::child(&self.0, idx)
295    }
296
297    fn child_type(&self, idx: usize) -> LayoutChildType {
298        V::child_type(&self.0, idx)
299    }
300
301    fn metadata(&self) -> Vec<u8> {
302        V::metadata(&self.0).serialize()
303    }
304
305    fn segment_ids(&self) -> Vec<SegmentId> {
306        V::segment_ids(&self.0)
307    }
308
309    fn new_reader(
310        &self,
311        name: Arc<str>,
312        segment_source: Arc<dyn SegmentSource>,
313        session: &VortexSession,
314    ) -> VortexResult<LayoutReaderRef> {
315        V::new_reader(&self.0, name, segment_source, session)
316    }
317}
318
319mod private {
320    use super::*;
321    use crate::layouts::foreign::ForeignLayout;
322
323    pub trait Sealed {}
324
325    impl<V: VTable> Sealed for LayoutAdapter<V> {}
326    impl Sealed for ForeignLayout {}
327}
328
329#[cfg(test)]
330mod tests {
331    use rstest::rstest;
332    use vortex_session::registry::ReadContext;
333
334    use super::*;
335
336    #[test]
337    fn test_layout_child_type_name() {
338        // Test Chunk variant
339        let chunk = LayoutChildType::Chunk((5, 100));
340        assert_eq!(chunk.name().as_ref(), "[5]");
341
342        // Test Field variant
343        let field = LayoutChildType::Field(FieldName::from("customer_id"));
344        assert_eq!(field.name().as_ref(), "customer_id");
345
346        // Test Auxiliary variant
347        let aux = LayoutChildType::Auxiliary(Arc::from("zone_map"));
348        assert_eq!(aux.name().as_ref(), "zone_map");
349
350        // Test Transparent variant
351        let transparent = LayoutChildType::Transparent(Arc::from("compressed"));
352        assert_eq!(transparent.name().as_ref(), "compressed");
353    }
354
355    #[test]
356    fn test_layout_child_type_row_offset() {
357        // Chunk should return the offset
358        let chunk = LayoutChildType::Chunk((0, 42));
359        assert_eq!(chunk.row_offset(), Some(42));
360
361        // Field should return 0
362        let field = LayoutChildType::Field(FieldName::from("field1"));
363        assert_eq!(field.row_offset(), Some(0));
364
365        // Auxiliary should return None
366        let aux = LayoutChildType::Auxiliary(Arc::from("metadata"));
367        assert_eq!(aux.row_offset(), None);
368
369        // Transparent should return 0
370        let transparent = LayoutChildType::Transparent(Arc::from("wrapper"));
371        assert_eq!(transparent.row_offset(), Some(0));
372    }
373
374    #[test]
375    fn test_layout_child_type_equality() {
376        // Test Chunk equality
377        let chunk1 = LayoutChildType::Chunk((1, 100));
378        let chunk2 = LayoutChildType::Chunk((1, 100));
379        let chunk3 = LayoutChildType::Chunk((2, 100));
380        let chunk4 = LayoutChildType::Chunk((1, 200));
381
382        assert_eq!(chunk1, chunk2);
383        assert_ne!(chunk1, chunk3);
384        assert_ne!(chunk1, chunk4);
385
386        // Test Field equality
387        let field1 = LayoutChildType::Field(FieldName::from("name"));
388        let field2 = LayoutChildType::Field(FieldName::from("name"));
389        let field3 = LayoutChildType::Field(FieldName::from("age"));
390
391        assert_eq!(field1, field2);
392        assert_ne!(field1, field3);
393
394        // Test Auxiliary equality
395        let aux1 = LayoutChildType::Auxiliary(Arc::from("stats"));
396        let aux2 = LayoutChildType::Auxiliary(Arc::from("stats"));
397        let aux3 = LayoutChildType::Auxiliary(Arc::from("index"));
398
399        assert_eq!(aux1, aux2);
400        assert_ne!(aux1, aux3);
401
402        // Test Transparent equality
403        let trans1 = LayoutChildType::Transparent(Arc::from("enc"));
404        let trans2 = LayoutChildType::Transparent(Arc::from("enc"));
405        let trans3 = LayoutChildType::Transparent(Arc::from("dec"));
406
407        assert_eq!(trans1, trans2);
408        assert_ne!(trans1, trans3);
409
410        // Test cross-variant inequality
411        assert_ne!(chunk1, field1);
412        assert_ne!(field1, aux1);
413        assert_ne!(aux1, trans1);
414    }
415
416    #[rstest]
417    #[case(LayoutChildType::Chunk((0, 0)), "[0]", Some(0))]
418    #[case(LayoutChildType::Chunk((999, 1000000)), "[999]", Some(1000000))]
419    #[case(LayoutChildType::Field(FieldName::from("")), "", Some(0))]
420    #[case(
421        LayoutChildType::Field(FieldName::from("very_long_field_name_that_is_quite_lengthy")),
422        "very_long_field_name_that_is_quite_lengthy",
423        Some(0)
424    )]
425    #[case(LayoutChildType::Auxiliary(Arc::from("aux")), "aux", None)]
426    #[case(LayoutChildType::Transparent(Arc::from("t")), "t", Some(0))]
427    fn test_layout_child_type_parameterized(
428        #[case] child_type: LayoutChildType,
429        #[case] expected_name: &str,
430        #[case] expected_offset: Option<u64>,
431    ) {
432        assert_eq!(child_type.name().as_ref(), expected_name);
433        assert_eq!(child_type.row_offset(), expected_offset);
434    }
435
436    #[test]
437    fn test_chunk_with_different_indices_and_offsets() {
438        let chunks = [
439            LayoutChildType::Chunk((0, 0)),
440            LayoutChildType::Chunk((1, 100)),
441            LayoutChildType::Chunk((2, 200)),
442            LayoutChildType::Chunk((100, 10000)),
443        ];
444
445        for chunk in chunks.iter() {
446            let name = chunk.name();
447            assert!(name.starts_with('['));
448            assert!(name.ends_with(']'));
449
450            if let LayoutChildType::Chunk((idx, offset)) = chunk {
451                assert_eq!(name.as_ref(), format!("[{}]", idx));
452                assert_eq!(chunk.row_offset(), Some(*offset));
453            }
454        }
455    }
456
457    #[test]
458    fn test_field_names_with_special_characters() {
459        let special_fields: Vec<Arc<str>> = vec![
460            Arc::from("field-with-dashes"),
461            Arc::from("field_with_underscores"),
462            Arc::from("field.with.dots"),
463            Arc::from("field::with::colons"),
464            Arc::from("field/with/slashes"),
465            Arc::from("field@with#symbols"),
466        ];
467
468        for field_name in special_fields {
469            let field = LayoutChildType::Field(Arc::clone(&field_name).into());
470            assert_eq!(field.name(), field_name);
471            assert_eq!(field.row_offset(), Some(0));
472        }
473    }
474
475    #[test]
476    fn test_struct_layout_display() {
477        use vortex_array::dtype::Nullability::NonNullable;
478        use vortex_array::dtype::PType;
479        use vortex_array::dtype::StructFields;
480
481        use crate::IntoLayout;
482        use crate::layouts::chunked::ChunkedLayout;
483        use crate::layouts::dict::DictLayout;
484        use crate::layouts::flat::FlatLayout;
485        use crate::layouts::struct_::StructLayout;
486        use crate::segments::SegmentId;
487
488        let ctx = ReadContext::new([]);
489
490        // Create a flat layout for dict values (utf8 strings)
491        let dict_values =
492            FlatLayout::new(3, DType::Utf8(NonNullable), SegmentId::from(0), ctx.clone())
493                .into_layout();
494
495        // Test flat layout display shows segment
496        assert_eq!(
497            format!("{}", dict_values),
498            "vortex.flat(utf8, rows=3, segments=[0])"
499        );
500
501        // Create a flat layout for dict codes
502        let dict_codes = FlatLayout::new(
503            10,
504            DType::Primitive(PType::U16, NonNullable),
505            SegmentId::from(1),
506            ctx.clone(),
507        )
508        .into_layout();
509
510        // Test flat layout display shows segment
511        assert_eq!(
512            format!("{}", dict_codes),
513            "vortex.flat(u16, rows=10, segments=[1])"
514        );
515
516        // Create dict layout (column "name")
517        let dict_layout =
518            DictLayout::new(Arc::clone(&dict_values), Arc::clone(&dict_codes)).into_layout();
519
520        // Test dict layout display (no direct segments)
521        assert_eq!(format!("{}", dict_layout), "vortex.dict(utf8, rows=10)");
522
523        // Create flat layouts for chunks
524        let chunk1 = FlatLayout::new(
525            5,
526            DType::Primitive(PType::I64, NonNullable),
527            SegmentId::from(2),
528            ctx.clone(),
529        )
530        .into_layout();
531
532        let chunk2 = FlatLayout::new(
533            5,
534            DType::Primitive(PType::I64, NonNullable),
535            SegmentId::from(3),
536            ctx,
537        )
538        .into_layout();
539
540        // Create chunked layout (column "value")
541        let chunked_layout = ChunkedLayout::new(
542            10,
543            DType::Primitive(PType::I64, NonNullable),
544            crate::OwnedLayoutChildren::layout_children(vec![
545                Arc::clone(&chunk1),
546                Arc::clone(&chunk2),
547            ]),
548        )
549        .into_layout();
550
551        // Test chunked layout display (no direct segments)
552        assert_eq!(
553            format!("{}", chunked_layout),
554            "vortex.chunked(i64, rows=10)"
555        );
556
557        // Test chunk displays show segments
558        assert_eq!(
559            format!("{}", chunk1),
560            "vortex.flat(i64, rows=5, segments=[2])"
561        );
562        assert_eq!(
563            format!("{}", chunk2),
564            "vortex.flat(i64, rows=5, segments=[3])"
565        );
566
567        // Create struct layout with two fields
568        let field_names: Vec<Arc<str>> = vec!["name".into(), "value".into()];
569        let struct_dtype = DType::Struct(
570            StructFields::new(
571                field_names.into(),
572                vec![
573                    DType::Utf8(NonNullable),
574                    DType::Primitive(PType::I64, NonNullable),
575                ],
576            ),
577            NonNullable,
578        );
579
580        let struct_layout =
581            StructLayout::new(10, struct_dtype, vec![dict_layout, chunked_layout]).into_layout();
582
583        println!("{}", struct_layout.display_tree_verbose(true));
584
585        // Test Display impl for struct (no direct segments)
586        assert_eq!(
587            format!("{}", struct_layout),
588            "vortex.struct({name=utf8, value=i64}, rows=10)"
589        );
590    }
591}