Skip to main content

vortex_layout/
layout.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::fmt::Debug;
6use std::fmt::Display;
7use std::fmt::Formatter;
8use std::sync::Arc;
9
10use arcref::ArcRef;
11use itertools::Itertools;
12use vortex_array::SerializeMetadata;
13use vortex_dtype::DType;
14use vortex_dtype::FieldName;
15use vortex_error::VortexExpect;
16use vortex_error::VortexResult;
17use vortex_error::vortex_err;
18use vortex_session::VortexSession;
19
20use crate::LayoutEncodingId;
21use crate::LayoutEncodingRef;
22use crate::LayoutReaderRef;
23use crate::VTable;
24use crate::display::DisplayLayoutTree;
25use crate::display::display_tree_with_segment_sizes;
26use crate::segments::SegmentId;
27use crate::segments::SegmentSource;
28
29pub type LayoutId = ArcRef<str>;
30
31pub type LayoutRef = Arc<dyn Layout>;
32
33pub trait Layout: 'static + Send + Sync + Debug + private::Sealed {
34    fn as_any(&self) -> &dyn Any;
35
36    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync>;
37
38    fn to_layout(&self) -> LayoutRef;
39
40    /// Returns the [`crate::LayoutEncoding`] for this layout.
41    fn encoding(&self) -> LayoutEncodingRef;
42
43    /// The number of rows in this layout.
44    fn row_count(&self) -> u64;
45
46    /// The dtype of this layout when projected with the root scope.
47    fn dtype(&self) -> &DType;
48
49    /// The number of children in this layout.
50    fn nchildren(&self) -> usize;
51
52    /// Get the child at the given index.
53    fn child(&self, idx: usize) -> VortexResult<LayoutRef>;
54
55    /// Get the relative row offset of the child at the given index, returning `None` for
56    /// any auxiliary children, e.g. dictionary values, zone maps, etc.
57    fn child_type(&self, idx: usize) -> LayoutChildType;
58
59    /// Get the metadata for this layout.
60    fn metadata(&self) -> Vec<u8>;
61
62    /// Get the segment IDs for this layout.
63    fn segment_ids(&self) -> Vec<SegmentId>;
64
65    fn new_reader(
66        &self,
67        name: Arc<str>,
68        segment_source: Arc<dyn SegmentSource>,
69        session: &VortexSession,
70    ) -> VortexResult<LayoutReaderRef>;
71}
72
73pub trait IntoLayout {
74    /// Converts this type into a [`LayoutRef`].
75    fn into_layout(self) -> LayoutRef;
76}
77
78/// A type that allows us to identify how a layout child relates to its parent.
79#[derive(Debug, Clone, PartialEq, Eq)]
80pub enum LayoutChildType {
81    /// A layout child that retains the same schema and row offset position in the dataset.
82    Transparent(Arc<str>),
83    /// A layout child that provides auxiliary data, e.g. dictionary values, zone maps, etc.
84    /// Contains a human-readable name of the child.
85    Auxiliary(Arc<str>),
86    /// A layout child that represents a row-based chunk of data.
87    /// Contains the chunk index and relative row offset of the child.
88    Chunk((usize, u64)),
89    /// A layout child that represents a single field of data.
90    /// Contains the field name of the child.
91    Field(FieldName),
92}
93
94impl LayoutChildType {
95    /// Returns the name of this child.
96    pub fn name(&self) -> Arc<str> {
97        match self {
98            LayoutChildType::Chunk((idx, _offset)) => format!("[{idx}]").into(),
99            LayoutChildType::Auxiliary(name) => name.clone(),
100            LayoutChildType::Transparent(name) => name.clone(),
101            LayoutChildType::Field(name) => name.clone().into(),
102        }
103    }
104
105    /// Returns the relative row offset of this child.
106    /// For auxiliary children, this is `None`.
107    pub fn row_offset(&self) -> Option<u64> {
108        match self {
109            LayoutChildType::Chunk((_idx, offset)) => Some(*offset),
110            LayoutChildType::Auxiliary(_) => None,
111            LayoutChildType::Transparent(_) => Some(0),
112            LayoutChildType::Field(_) => Some(0),
113        }
114    }
115}
116
117impl dyn Layout + '_ {
118    /// The ID of the encoding for this layout.
119    pub fn encoding_id(&self) -> LayoutEncodingId {
120        self.encoding().id()
121    }
122
123    /// The children of this layout.
124    pub fn children(&self) -> VortexResult<Vec<LayoutRef>> {
125        (0..self.nchildren()).map(|i| self.child(i)).try_collect()
126    }
127
128    /// The child types of this layout.
129    pub fn child_types(&self) -> impl Iterator<Item = LayoutChildType> {
130        (0..self.nchildren()).map(|i| self.child_type(i))
131    }
132
133    /// The names of the children of this layout.
134    pub fn child_names(&self) -> impl Iterator<Item = Arc<str>> {
135        self.child_types().map(|child| child.name())
136    }
137
138    /// The row offsets of the children of this layout, where `None` indicates an auxiliary child.
139    pub fn child_row_offsets(&self) -> impl Iterator<Item = Option<u64>> {
140        self.child_types().map(|child| child.row_offset())
141    }
142
143    pub fn is<V: VTable>(&self) -> bool {
144        self.as_opt::<V>().is_some()
145    }
146
147    /// Downcast a layout to a specific type.
148    pub fn as_<V: VTable>(&self) -> &V::Layout {
149        self.as_opt::<V>().vortex_expect("Failed to downcast")
150    }
151
152    /// Downcast a layout to a specific type.
153    pub fn as_opt<V: VTable>(&self) -> Option<&V::Layout> {
154        self.as_any()
155            .downcast_ref::<LayoutAdapter<V>>()
156            .map(|adapter| &adapter.0)
157    }
158
159    /// Downcast a layout to a specific type.
160    pub fn into<V: VTable>(self: Arc<Self>) -> Arc<V::Layout> {
161        let layout_adapter = self
162            .as_any_arc()
163            .downcast::<LayoutAdapter<V>>()
164            .map_err(|_| vortex_err!("Invalid layout type"))
165            .vortex_expect("Invalid layout type");
166
167        // SAFETY: LayoutAdapter<V> is #[repr(transparent)] (see line 192) which guarantees
168        // it has the same memory layout as V::Layout. The downcast above ensures we have
169        // the correct type. This transmute is safe because both Arc types point to data
170        // with identical layout and alignment.
171        unsafe { std::mem::transmute::<Arc<LayoutAdapter<V>>, Arc<V::Layout>>(layout_adapter) }
172    }
173
174    /// Depth-first traversal of the layout and its children.
175    pub fn depth_first_traversal(&self) -> impl Iterator<Item = VortexResult<LayoutRef>> {
176        /// A depth-first pre-order iterator over a layout.
177        struct ChildrenIterator {
178            stack: Vec<LayoutRef>,
179        }
180
181        impl Iterator for ChildrenIterator {
182            type Item = VortexResult<LayoutRef>;
183
184            fn next(&mut self) -> Option<Self::Item> {
185                let next = self.stack.pop()?;
186                let Ok(children) = next.children() else {
187                    return Some(Ok(next));
188                };
189                for child in children.into_iter().rev() {
190                    self.stack.push(child);
191                }
192                Some(Ok(next))
193            }
194        }
195
196        ChildrenIterator {
197            stack: vec![self.to_layout()],
198        }
199    }
200
201    /// Display the layout as a tree structure.
202    pub fn display_tree(&self) -> DisplayLayoutTree {
203        DisplayLayoutTree::new(self.to_layout(), false)
204    }
205
206    /// Display the layout as a tree structure with optional verbose metadata.
207    pub fn display_tree_verbose(&self, verbose: bool) -> DisplayLayoutTree {
208        DisplayLayoutTree::new(self.to_layout(), verbose)
209    }
210
211    /// Display the layout as a tree structure, fetching segment buffer sizes from the segment source.
212    ///
213    /// # Warning
214    ///
215    /// This function performs IO to fetch each segment's buffer. For layouts with
216    /// many segments, this may result in significant IO overhead.
217    pub async fn display_tree_with_segments(
218        &self,
219        segment_source: Arc<dyn SegmentSource>,
220    ) -> VortexResult<DisplayLayoutTree> {
221        display_tree_with_segment_sizes(self.to_layout(), segment_source).await
222    }
223}
224
225/// Display the encoding, dtype, row count, and segment IDs of this layout.
226impl Display for dyn Layout + '_ {
227    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
228        let segment_ids = self.segment_ids();
229        if segment_ids.is_empty() {
230            write!(
231                f,
232                "{}({}, rows={})",
233                self.encoding_id(),
234                self.dtype(),
235                self.row_count()
236            )
237        } else {
238            write!(
239                f,
240                "{}({}, rows={}, segments=[{}])",
241                self.encoding_id(),
242                self.dtype(),
243                self.row_count(),
244                segment_ids
245                    .iter()
246                    .map(|s| format!("{}", **s))
247                    .collect::<Vec<_>>()
248                    .join(", ")
249            )
250        }
251    }
252}
253
254#[repr(transparent)]
255pub struct LayoutAdapter<V: VTable>(V::Layout);
256
257impl<V: VTable> Debug for LayoutAdapter<V> {
258    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
259        self.0.fmt(f)
260    }
261}
262
263impl<V: VTable> Layout for LayoutAdapter<V> {
264    fn as_any(&self) -> &dyn Any {
265        self
266    }
267
268    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
269        self
270    }
271
272    fn to_layout(&self) -> LayoutRef {
273        Arc::new(LayoutAdapter::<V>(self.0.clone()))
274    }
275
276    fn encoding(&self) -> LayoutEncodingRef {
277        V::encoding(&self.0)
278    }
279
280    fn row_count(&self) -> u64 {
281        V::row_count(&self.0)
282    }
283
284    fn dtype(&self) -> &DType {
285        V::dtype(&self.0)
286    }
287
288    fn nchildren(&self) -> usize {
289        V::nchildren(&self.0)
290    }
291
292    fn child(&self, idx: usize) -> VortexResult<LayoutRef> {
293        V::child(&self.0, idx)
294    }
295
296    fn child_type(&self, idx: usize) -> LayoutChildType {
297        V::child_type(&self.0, idx)
298    }
299
300    fn metadata(&self) -> Vec<u8> {
301        V::metadata(&self.0).serialize()
302    }
303
304    fn segment_ids(&self) -> Vec<SegmentId> {
305        V::segment_ids(&self.0)
306    }
307
308    fn new_reader(
309        &self,
310        name: Arc<str>,
311        segment_source: Arc<dyn SegmentSource>,
312        session: &VortexSession,
313    ) -> VortexResult<LayoutReaderRef> {
314        V::new_reader(&self.0, name, segment_source, session)
315    }
316}
317
318mod private {
319    use super::*;
320
321    pub trait Sealed {}
322
323    impl<V: VTable> Sealed for LayoutAdapter<V> {}
324}
325
326#[cfg(test)]
327mod tests {
328    use rstest::rstest;
329    use vortex_array::ArrayContext;
330
331    use super::*;
332
333    #[test]
334    fn test_layout_child_type_name() {
335        // Test Chunk variant
336        let chunk = LayoutChildType::Chunk((5, 100));
337        assert_eq!(chunk.name().as_ref(), "[5]");
338
339        // Test Field variant
340        let field = LayoutChildType::Field(FieldName::from("customer_id"));
341        assert_eq!(field.name().as_ref(), "customer_id");
342
343        // Test Auxiliary variant
344        let aux = LayoutChildType::Auxiliary(Arc::from("zone_map"));
345        assert_eq!(aux.name().as_ref(), "zone_map");
346
347        // Test Transparent variant
348        let transparent = LayoutChildType::Transparent(Arc::from("compressed"));
349        assert_eq!(transparent.name().as_ref(), "compressed");
350    }
351
352    #[test]
353    fn test_layout_child_type_row_offset() {
354        // Chunk should return the offset
355        let chunk = LayoutChildType::Chunk((0, 42));
356        assert_eq!(chunk.row_offset(), Some(42));
357
358        // Field should return 0
359        let field = LayoutChildType::Field(FieldName::from("field1"));
360        assert_eq!(field.row_offset(), Some(0));
361
362        // Auxiliary should return None
363        let aux = LayoutChildType::Auxiliary(Arc::from("metadata"));
364        assert_eq!(aux.row_offset(), None);
365
366        // Transparent should return 0
367        let transparent = LayoutChildType::Transparent(Arc::from("wrapper"));
368        assert_eq!(transparent.row_offset(), Some(0));
369    }
370
371    #[test]
372    fn test_layout_child_type_equality() {
373        // Test Chunk equality
374        let chunk1 = LayoutChildType::Chunk((1, 100));
375        let chunk2 = LayoutChildType::Chunk((1, 100));
376        let chunk3 = LayoutChildType::Chunk((2, 100));
377        let chunk4 = LayoutChildType::Chunk((1, 200));
378
379        assert_eq!(chunk1, chunk2);
380        assert_ne!(chunk1, chunk3);
381        assert_ne!(chunk1, chunk4);
382
383        // Test Field equality
384        let field1 = LayoutChildType::Field(FieldName::from("name"));
385        let field2 = LayoutChildType::Field(FieldName::from("name"));
386        let field3 = LayoutChildType::Field(FieldName::from("age"));
387
388        assert_eq!(field1, field2);
389        assert_ne!(field1, field3);
390
391        // Test Auxiliary equality
392        let aux1 = LayoutChildType::Auxiliary(Arc::from("stats"));
393        let aux2 = LayoutChildType::Auxiliary(Arc::from("stats"));
394        let aux3 = LayoutChildType::Auxiliary(Arc::from("index"));
395
396        assert_eq!(aux1, aux2);
397        assert_ne!(aux1, aux3);
398
399        // Test Transparent equality
400        let trans1 = LayoutChildType::Transparent(Arc::from("enc"));
401        let trans2 = LayoutChildType::Transparent(Arc::from("enc"));
402        let trans3 = LayoutChildType::Transparent(Arc::from("dec"));
403
404        assert_eq!(trans1, trans2);
405        assert_ne!(trans1, trans3);
406
407        // Test cross-variant inequality
408        assert_ne!(chunk1, field1);
409        assert_ne!(field1, aux1);
410        assert_ne!(aux1, trans1);
411    }
412
413    #[rstest]
414    #[case(LayoutChildType::Chunk((0, 0)), "[0]", Some(0))]
415    #[case(LayoutChildType::Chunk((999, 1000000)), "[999]", Some(1000000))]
416    #[case(LayoutChildType::Field(FieldName::from("")), "", Some(0))]
417    #[case(
418        LayoutChildType::Field(FieldName::from("very_long_field_name_that_is_quite_lengthy")),
419        "very_long_field_name_that_is_quite_lengthy",
420        Some(0)
421    )]
422    #[case(LayoutChildType::Auxiliary(Arc::from("aux")), "aux", None)]
423    #[case(LayoutChildType::Transparent(Arc::from("t")), "t", Some(0))]
424    fn test_layout_child_type_parameterized(
425        #[case] child_type: LayoutChildType,
426        #[case] expected_name: &str,
427        #[case] expected_offset: Option<u64>,
428    ) {
429        assert_eq!(child_type.name().as_ref(), expected_name);
430        assert_eq!(child_type.row_offset(), expected_offset);
431    }
432
433    #[test]
434    fn test_chunk_with_different_indices_and_offsets() {
435        let chunks = [
436            LayoutChildType::Chunk((0, 0)),
437            LayoutChildType::Chunk((1, 100)),
438            LayoutChildType::Chunk((2, 200)),
439            LayoutChildType::Chunk((100, 10000)),
440        ];
441
442        for chunk in chunks.iter() {
443            let name = chunk.name();
444            assert!(name.starts_with('['));
445            assert!(name.ends_with(']'));
446
447            if let LayoutChildType::Chunk((idx, offset)) = chunk {
448                assert_eq!(name.as_ref(), format!("[{}]", idx));
449                assert_eq!(chunk.row_offset(), Some(*offset));
450            }
451        }
452    }
453
454    #[test]
455    fn test_field_names_with_special_characters() {
456        let special_fields: Vec<Arc<str>> = vec![
457            Arc::from("field-with-dashes"),
458            Arc::from("field_with_underscores"),
459            Arc::from("field.with.dots"),
460            Arc::from("field::with::colons"),
461            Arc::from("field/with/slashes"),
462            Arc::from("field@with#symbols"),
463        ];
464
465        for field_name in special_fields {
466            let field = LayoutChildType::Field(field_name.clone().into());
467            assert_eq!(field.name(), field_name);
468            assert_eq!(field.row_offset(), Some(0));
469        }
470    }
471
472    #[test]
473    fn test_struct_layout_display() {
474        use vortex_dtype::Nullability::NonNullable;
475        use vortex_dtype::PType;
476        use vortex_dtype::StructFields;
477
478        use crate::IntoLayout;
479        use crate::layouts::chunked::ChunkedLayout;
480        use crate::layouts::dict::DictLayout;
481        use crate::layouts::flat::FlatLayout;
482        use crate::layouts::struct_::StructLayout;
483        use crate::segments::SegmentId;
484
485        let ctx = ArrayContext::empty();
486
487        // Create a flat layout for dict values (utf8 strings)
488        let dict_values =
489            FlatLayout::new(3, DType::Utf8(NonNullable), SegmentId::from(0), ctx.clone())
490                .into_layout();
491
492        // Test flat layout display shows segment
493        assert_eq!(
494            format!("{}", dict_values),
495            "vortex.flat(utf8, rows=3, segments=[0])"
496        );
497
498        // Create a flat layout for dict codes
499        let dict_codes = FlatLayout::new(
500            10,
501            DType::Primitive(PType::U16, NonNullable),
502            SegmentId::from(1),
503            ctx.clone(),
504        )
505        .into_layout();
506
507        // Test flat layout display shows segment
508        assert_eq!(
509            format!("{}", dict_codes),
510            "vortex.flat(u16, rows=10, segments=[1])"
511        );
512
513        // Create dict layout (column "name")
514        let dict_layout = DictLayout::new(dict_values.clone(), dict_codes.clone()).into_layout();
515
516        // Test dict layout display (no direct segments)
517        assert_eq!(format!("{}", dict_layout), "vortex.dict(utf8, rows=10)");
518
519        // Create flat layouts for chunks
520        let chunk1 = FlatLayout::new(
521            5,
522            DType::Primitive(PType::I64, NonNullable),
523            SegmentId::from(2),
524            ctx.clone(),
525        )
526        .into_layout();
527
528        let chunk2 = FlatLayout::new(
529            5,
530            DType::Primitive(PType::I64, NonNullable),
531            SegmentId::from(3),
532            ctx,
533        )
534        .into_layout();
535
536        // Create chunked layout (column "value")
537        let chunked_layout = ChunkedLayout::new(
538            10,
539            DType::Primitive(PType::I64, NonNullable),
540            crate::OwnedLayoutChildren::layout_children(vec![chunk1.clone(), chunk2.clone()]),
541        )
542        .into_layout();
543
544        // Test chunked layout display (no direct segments)
545        assert_eq!(
546            format!("{}", chunked_layout),
547            "vortex.chunked(i64, rows=10)"
548        );
549
550        // Test chunk displays show segments
551        assert_eq!(
552            format!("{}", chunk1),
553            "vortex.flat(i64, rows=5, segments=[2])"
554        );
555        assert_eq!(
556            format!("{}", chunk2),
557            "vortex.flat(i64, rows=5, segments=[3])"
558        );
559
560        // Create struct layout with two fields
561        let field_names: Vec<Arc<str>> = vec!["name".into(), "value".into()];
562        let struct_dtype = DType::Struct(
563            StructFields::new(
564                field_names.into(),
565                vec![
566                    DType::Utf8(NonNullable),
567                    DType::Primitive(PType::I64, NonNullable),
568                ],
569            ),
570            NonNullable,
571        );
572
573        let struct_layout =
574            StructLayout::new(10, struct_dtype, vec![dict_layout, chunked_layout]).into_layout();
575
576        println!("{}", struct_layout.display_tree_verbose(true));
577
578        // Test Display impl for struct (no direct segments)
579        assert_eq!(
580            format!("{}", struct_layout),
581            "vortex.struct({name=utf8, value=i64}, rows=10)"
582        );
583    }
584}