Skip to main content

vortex_layout/
layout.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::fmt::Debug;
6use std::fmt::Display;
7use std::fmt::Formatter;
8use std::sync::Arc;
9
10use itertools::Itertools;
11use vortex_array::SerializeMetadata;
12use vortex_array::dtype::DType;
13use vortex_array::dtype::FieldName;
14use vortex_error::VortexExpect;
15use vortex_error::VortexResult;
16use vortex_error::vortex_err;
17use vortex_session::VortexSession;
18use vortex_session::registry::Id;
19
20use crate::LayoutEncodingId;
21use crate::LayoutEncodingRef;
22use crate::LayoutReaderContext;
23use crate::LayoutReaderRef;
24use crate::VTable;
25use crate::display::DisplayLayoutTree;
26use crate::display::display_tree_with_segment_sizes;
27use crate::segments::SegmentId;
28use crate::segments::SegmentSource;
29
30/// A unique identifier for a layout.
31pub type LayoutId = Id;
32
33pub type LayoutRef = Arc<dyn Layout>;
34
35pub trait Layout: 'static + Send + Sync + Debug + private::Sealed {
36    fn as_any(&self) -> &dyn Any;
37
38    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync>;
39
40    fn to_layout(&self) -> LayoutRef;
41
42    /// Returns the [`crate::LayoutEncoding`] for this layout.
43    fn encoding(&self) -> LayoutEncodingRef;
44
45    /// The number of rows in this layout.
46    fn row_count(&self) -> u64;
47
48    /// The dtype of this layout when projected with the root scope.
49    fn dtype(&self) -> &DType;
50
51    /// The number of children in this layout.
52    fn nchildren(&self) -> usize;
53
54    /// Get the child at the given index.
55    fn child(&self, idx: usize) -> VortexResult<LayoutRef>;
56
57    /// Get the relative row offset of the child at the given index, returning `None` for
58    /// any auxiliary children, e.g. dictionary values, zone maps, etc.
59    fn child_type(&self, idx: usize) -> LayoutChildType;
60
61    /// Get the metadata for this layout.
62    fn metadata(&self) -> Vec<u8>;
63
64    /// Get the segment IDs for this layout.
65    fn segment_ids(&self) -> Vec<SegmentId>;
66
67    /// Construct a new reader for this layout.
68    ///
69    /// - `name` — human-readable label for this reader, propagated to child readers
70    ///   (typically by appending a path component) and surfaced in tracing/debug output.
71    /// - `segment_source` — source of segment bytes for this and any descendant readers
72    ///   constructed from the returned reader; recursive callers should pass the same
73    ///   source through.
74    /// - `session` — the [`VortexSession`] hosting the encoding/scalar/layout registries
75    ///   and execution context the reader needs at evaluation time.
76    /// - `ctx` — id-keyed dependency registry threaded through reader construction (see
77    ///   [`LayoutReaderContext`]). Top-level callers (file open, tests) typically pass
78    ///   `&LayoutReaderContext::new()`; recursive callers inside layout implementations
79    ///   must propagate the `ctx` they were handed so ancestor-published values reach
80    ///   descendants.
81    fn new_reader(
82        &self,
83        name: Arc<str>,
84        segment_source: Arc<dyn SegmentSource>,
85        session: &VortexSession,
86        ctx: &LayoutReaderContext,
87    ) -> VortexResult<LayoutReaderRef>;
88}
89
90pub trait IntoLayout {
91    /// Converts this type into a [`LayoutRef`].
92    fn into_layout(self) -> LayoutRef;
93}
94
95/// A type that allows us to identify how a layout child relates to its parent.
96#[derive(Debug, Clone, PartialEq, Eq)]
97pub enum LayoutChildType {
98    /// A layout child that retains the same schema and row offset position in the dataset.
99    Transparent(Arc<str>),
100    /// A layout child that provides auxiliary data, e.g. dictionary values, zone maps, etc.
101    /// Contains a human-readable name of the child.
102    Auxiliary(Arc<str>),
103    /// A layout child that represents a row-based chunk of data.
104    /// Contains the chunk index and relative row offset of the child.
105    Chunk((usize, u64)),
106    /// A layout child that represents a single field of data.
107    /// Contains the field name of the child.
108    Field(FieldName),
109}
110
111impl LayoutChildType {
112    /// Returns the name of this child.
113    pub fn name(&self) -> Arc<str> {
114        match self {
115            LayoutChildType::Chunk((idx, _offset)) => format!("[{idx}]").into(),
116            LayoutChildType::Auxiliary(name) => Arc::clone(name),
117            LayoutChildType::Transparent(name) => Arc::clone(name),
118            LayoutChildType::Field(name) => name.clone().into(),
119        }
120    }
121
122    /// Returns the relative row offset of this child.
123    /// For auxiliary children, this is `None`.
124    pub fn row_offset(&self) -> Option<u64> {
125        match self {
126            LayoutChildType::Chunk((_idx, offset)) => Some(*offset),
127            LayoutChildType::Auxiliary(_) => None,
128            LayoutChildType::Transparent(_) => Some(0),
129            LayoutChildType::Field(_) => Some(0),
130        }
131    }
132}
133
134impl dyn Layout + '_ {
135    /// The ID of the encoding for this layout.
136    pub fn encoding_id(&self) -> LayoutEncodingId {
137        self.encoding().id()
138    }
139
140    /// The children of this layout.
141    pub fn children(&self) -> VortexResult<Vec<LayoutRef>> {
142        (0..self.nchildren()).map(|i| self.child(i)).try_collect()
143    }
144
145    /// The child types of this layout.
146    pub fn child_types(&self) -> impl Iterator<Item = LayoutChildType> {
147        (0..self.nchildren()).map(|i| self.child_type(i))
148    }
149
150    /// The names of the children of this layout.
151    pub fn child_names(&self) -> impl Iterator<Item = Arc<str>> {
152        self.child_types().map(|child| child.name())
153    }
154
155    /// The row offsets of the children of this layout, where `None` indicates an auxiliary child.
156    pub fn child_row_offsets(&self) -> impl Iterator<Item = Option<u64>> {
157        self.child_types().map(|child| child.row_offset())
158    }
159
160    pub fn is<V: VTable>(&self) -> bool {
161        self.as_opt::<V>().is_some()
162    }
163
164    /// Downcast a layout to a specific type.
165    pub fn as_<V: VTable>(&self) -> &V::Layout {
166        self.as_opt::<V>().vortex_expect("Failed to downcast")
167    }
168
169    /// Downcast a layout to a specific type.
170    pub fn as_opt<V: VTable>(&self) -> Option<&V::Layout> {
171        self.as_any()
172            .downcast_ref::<LayoutAdapter<V>>()
173            .map(|adapter| &adapter.0)
174    }
175
176    /// Downcast a layout to a specific type.
177    pub fn into<V: VTable>(self: Arc<Self>) -> Arc<V::Layout> {
178        let layout_adapter = self
179            .as_any_arc()
180            .downcast::<LayoutAdapter<V>>()
181            .map_err(|_| vortex_err!("Invalid layout type"))
182            .vortex_expect("Invalid layout type");
183
184        // SAFETY: LayoutAdapter<V> is #[repr(transparent)] (see line 192) which guarantees
185        // it has the same memory layout as V::Layout. The downcast above ensures we have
186        // the correct type. This transmute is safe because both Arc types point to data
187        // with identical layout and alignment.
188        unsafe { std::mem::transmute::<Arc<LayoutAdapter<V>>, Arc<V::Layout>>(layout_adapter) }
189    }
190
191    /// Depth-first traversal of the layout and its children.
192    pub fn depth_first_traversal(&self) -> impl Iterator<Item = VortexResult<LayoutRef>> {
193        /// A depth-first pre-order iterator over a layout.
194        struct ChildrenIterator {
195            stack: Vec<LayoutRef>,
196        }
197
198        impl Iterator for ChildrenIterator {
199            type Item = VortexResult<LayoutRef>;
200
201            fn next(&mut self) -> Option<Self::Item> {
202                let next = self.stack.pop()?;
203                let Ok(children) = next.children() else {
204                    return Some(Ok(next));
205                };
206                for child in children.into_iter().rev() {
207                    self.stack.push(child);
208                }
209                Some(Ok(next))
210            }
211        }
212
213        ChildrenIterator {
214            stack: vec![self.to_layout()],
215        }
216    }
217
218    /// Display the layout as a tree structure.
219    pub fn display_tree(&self) -> DisplayLayoutTree {
220        DisplayLayoutTree::new(self.to_layout(), false)
221    }
222
223    /// Display the layout as a tree structure with optional verbose metadata.
224    pub fn display_tree_verbose(&self, verbose: bool) -> DisplayLayoutTree {
225        DisplayLayoutTree::new(self.to_layout(), verbose)
226    }
227
228    /// Display the layout as a tree structure, fetching segment buffer sizes from the segment source.
229    ///
230    /// # Warning
231    ///
232    /// This function performs IO to fetch each segment's buffer. For layouts with
233    /// many segments, this may result in significant IO overhead.
234    pub async fn display_tree_with_segments(
235        &self,
236        segment_source: Arc<dyn SegmentSource>,
237    ) -> VortexResult<DisplayLayoutTree> {
238        display_tree_with_segment_sizes(self.to_layout(), segment_source).await
239    }
240}
241
242/// Display the encoding, dtype, row count, and segment IDs of this layout.
243impl Display for dyn Layout + '_ {
244    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
245        let segment_ids = self.segment_ids();
246        if segment_ids.is_empty() {
247            write!(
248                f,
249                "{}({}, rows={})",
250                self.encoding_id(),
251                self.dtype(),
252                self.row_count()
253            )
254        } else {
255            write!(
256                f,
257                "{}({}, rows={}, segments=[{}])",
258                self.encoding_id(),
259                self.dtype(),
260                self.row_count(),
261                segment_ids
262                    .iter()
263                    .map(|s| format!("{}", **s))
264                    .collect::<Vec<_>>()
265                    .join(", ")
266            )
267        }
268    }
269}
270
271#[repr(transparent)]
272pub struct LayoutAdapter<V: VTable>(V::Layout);
273
274impl<V: VTable> Debug for LayoutAdapter<V> {
275    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
276        self.0.fmt(f)
277    }
278}
279
280impl<V: VTable> Layout for LayoutAdapter<V> {
281    fn as_any(&self) -> &dyn Any {
282        self
283    }
284
285    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
286        self
287    }
288
289    fn to_layout(&self) -> LayoutRef {
290        Arc::new(LayoutAdapter::<V>(self.0.clone()))
291    }
292
293    fn encoding(&self) -> LayoutEncodingRef {
294        V::encoding(&self.0)
295    }
296
297    fn row_count(&self) -> u64 {
298        V::row_count(&self.0)
299    }
300
301    fn dtype(&self) -> &DType {
302        V::dtype(&self.0)
303    }
304
305    fn nchildren(&self) -> usize {
306        V::nchildren(&self.0)
307    }
308
309    fn child(&self, idx: usize) -> VortexResult<LayoutRef> {
310        V::child(&self.0, idx)
311    }
312
313    fn child_type(&self, idx: usize) -> LayoutChildType {
314        V::child_type(&self.0, idx)
315    }
316
317    fn metadata(&self) -> Vec<u8> {
318        V::metadata(&self.0).serialize()
319    }
320
321    fn segment_ids(&self) -> Vec<SegmentId> {
322        V::segment_ids(&self.0)
323    }
324
325    fn new_reader(
326        &self,
327        name: Arc<str>,
328        segment_source: Arc<dyn SegmentSource>,
329        session: &VortexSession,
330        ctx: &LayoutReaderContext,
331    ) -> VortexResult<LayoutReaderRef> {
332        V::new_reader(&self.0, name, segment_source, session, ctx)
333    }
334}
335
336mod private {
337    use super::*;
338    use crate::layouts::foreign::ForeignLayout;
339
340    pub trait Sealed {}
341
342    impl<V: VTable> Sealed for LayoutAdapter<V> {}
343    impl Sealed for ForeignLayout {}
344}
345
346#[cfg(test)]
347mod tests {
348    use rstest::rstest;
349    use vortex_session::registry::ReadContext;
350
351    use super::*;
352
353    #[test]
354    fn test_layout_child_type_name() {
355        // Test Chunk variant
356        let chunk = LayoutChildType::Chunk((5, 100));
357        assert_eq!(chunk.name().as_ref(), "[5]");
358
359        // Test Field variant
360        let field = LayoutChildType::Field(FieldName::from("customer_id"));
361        assert_eq!(field.name().as_ref(), "customer_id");
362
363        // Test Auxiliary variant
364        let aux = LayoutChildType::Auxiliary(Arc::from("zone_map"));
365        assert_eq!(aux.name().as_ref(), "zone_map");
366
367        // Test Transparent variant
368        let transparent = LayoutChildType::Transparent(Arc::from("compressed"));
369        assert_eq!(transparent.name().as_ref(), "compressed");
370    }
371
372    #[test]
373    fn test_layout_child_type_row_offset() {
374        // Chunk should return the offset
375        let chunk = LayoutChildType::Chunk((0, 42));
376        assert_eq!(chunk.row_offset(), Some(42));
377
378        // Field should return 0
379        let field = LayoutChildType::Field(FieldName::from("field1"));
380        assert_eq!(field.row_offset(), Some(0));
381
382        // Auxiliary should return None
383        let aux = LayoutChildType::Auxiliary(Arc::from("metadata"));
384        assert_eq!(aux.row_offset(), None);
385
386        // Transparent should return 0
387        let transparent = LayoutChildType::Transparent(Arc::from("wrapper"));
388        assert_eq!(transparent.row_offset(), Some(0));
389    }
390
391    #[test]
392    fn test_layout_child_type_equality() {
393        // Test Chunk equality
394        let chunk1 = LayoutChildType::Chunk((1, 100));
395        let chunk2 = LayoutChildType::Chunk((1, 100));
396        let chunk3 = LayoutChildType::Chunk((2, 100));
397        let chunk4 = LayoutChildType::Chunk((1, 200));
398
399        assert_eq!(chunk1, chunk2);
400        assert_ne!(chunk1, chunk3);
401        assert_ne!(chunk1, chunk4);
402
403        // Test Field equality
404        let field1 = LayoutChildType::Field(FieldName::from("name"));
405        let field2 = LayoutChildType::Field(FieldName::from("name"));
406        let field3 = LayoutChildType::Field(FieldName::from("age"));
407
408        assert_eq!(field1, field2);
409        assert_ne!(field1, field3);
410
411        // Test Auxiliary equality
412        let aux1 = LayoutChildType::Auxiliary(Arc::from("stats"));
413        let aux2 = LayoutChildType::Auxiliary(Arc::from("stats"));
414        let aux3 = LayoutChildType::Auxiliary(Arc::from("index"));
415
416        assert_eq!(aux1, aux2);
417        assert_ne!(aux1, aux3);
418
419        // Test Transparent equality
420        let trans1 = LayoutChildType::Transparent(Arc::from("enc"));
421        let trans2 = LayoutChildType::Transparent(Arc::from("enc"));
422        let trans3 = LayoutChildType::Transparent(Arc::from("dec"));
423
424        assert_eq!(trans1, trans2);
425        assert_ne!(trans1, trans3);
426
427        // Test cross-variant inequality
428        assert_ne!(chunk1, field1);
429        assert_ne!(field1, aux1);
430        assert_ne!(aux1, trans1);
431    }
432
433    #[rstest]
434    #[case(LayoutChildType::Chunk((0, 0)), "[0]", Some(0))]
435    #[case(LayoutChildType::Chunk((999, 1000000)), "[999]", Some(1000000))]
436    #[case(LayoutChildType::Field(FieldName::from("")), "", Some(0))]
437    #[case(
438        LayoutChildType::Field(FieldName::from("very_long_field_name_that_is_quite_lengthy")),
439        "very_long_field_name_that_is_quite_lengthy",
440        Some(0)
441    )]
442    #[case(LayoutChildType::Auxiliary(Arc::from("aux")), "aux", None)]
443    #[case(LayoutChildType::Transparent(Arc::from("t")), "t", Some(0))]
444    fn test_layout_child_type_parameterized(
445        #[case] child_type: LayoutChildType,
446        #[case] expected_name: &str,
447        #[case] expected_offset: Option<u64>,
448    ) {
449        assert_eq!(child_type.name().as_ref(), expected_name);
450        assert_eq!(child_type.row_offset(), expected_offset);
451    }
452
453    #[test]
454    fn test_chunk_with_different_indices_and_offsets() {
455        let chunks = [
456            LayoutChildType::Chunk((0, 0)),
457            LayoutChildType::Chunk((1, 100)),
458            LayoutChildType::Chunk((2, 200)),
459            LayoutChildType::Chunk((100, 10000)),
460        ];
461
462        for chunk in chunks.iter() {
463            let name = chunk.name();
464            assert!(name.starts_with('['));
465            assert!(name.ends_with(']'));
466
467            if let LayoutChildType::Chunk((idx, offset)) = chunk {
468                assert_eq!(name.as_ref(), format!("[{}]", idx));
469                assert_eq!(chunk.row_offset(), Some(*offset));
470            }
471        }
472    }
473
474    #[test]
475    fn test_field_names_with_special_characters() {
476        let special_fields: Vec<Arc<str>> = vec![
477            Arc::from("field-with-dashes"),
478            Arc::from("field_with_underscores"),
479            Arc::from("field.with.dots"),
480            Arc::from("field::with::colons"),
481            Arc::from("field/with/slashes"),
482            Arc::from("field@with#symbols"),
483        ];
484
485        for field_name in special_fields {
486            let field = LayoutChildType::Field(Arc::clone(&field_name).into());
487            assert_eq!(field.name(), field_name);
488            assert_eq!(field.row_offset(), Some(0));
489        }
490    }
491
492    #[test]
493    fn test_struct_layout_display() {
494        use vortex_array::dtype::Nullability::NonNullable;
495        use vortex_array::dtype::PType;
496        use vortex_array::dtype::StructFields;
497
498        use crate::IntoLayout;
499        use crate::layouts::chunked::ChunkedLayout;
500        use crate::layouts::dict::DictLayout;
501        use crate::layouts::flat::FlatLayout;
502        use crate::layouts::struct_::StructLayout;
503        use crate::segments::SegmentId;
504
505        let ctx = ReadContext::new([]);
506
507        // Create a flat layout for dict values (utf8 strings)
508        let dict_values =
509            FlatLayout::new(3, DType::Utf8(NonNullable), SegmentId::from(0), ctx.clone())
510                .into_layout();
511
512        // Test flat layout display shows segment
513        assert_eq!(
514            format!("{}", dict_values),
515            "vortex.flat(utf8, rows=3, segments=[0])"
516        );
517
518        // Create a flat layout for dict codes
519        let dict_codes = FlatLayout::new(
520            10,
521            DType::Primitive(PType::U16, NonNullable),
522            SegmentId::from(1),
523            ctx.clone(),
524        )
525        .into_layout();
526
527        // Test flat layout display shows segment
528        assert_eq!(
529            format!("{}", dict_codes),
530            "vortex.flat(u16, rows=10, segments=[1])"
531        );
532
533        // Create dict layout (column "name")
534        let dict_layout =
535            DictLayout::new(Arc::clone(&dict_values), Arc::clone(&dict_codes)).into_layout();
536
537        // Test dict layout display (no direct segments)
538        assert_eq!(format!("{}", dict_layout), "vortex.dict(utf8, rows=10)");
539
540        // Create flat layouts for chunks
541        let chunk1 = FlatLayout::new(
542            5,
543            DType::Primitive(PType::I64, NonNullable),
544            SegmentId::from(2),
545            ctx.clone(),
546        )
547        .into_layout();
548
549        let chunk2 = FlatLayout::new(
550            5,
551            DType::Primitive(PType::I64, NonNullable),
552            SegmentId::from(3),
553            ctx,
554        )
555        .into_layout();
556
557        // Create chunked layout (column "value")
558        let chunked_layout = ChunkedLayout::new(
559            10,
560            DType::Primitive(PType::I64, NonNullable),
561            crate::OwnedLayoutChildren::layout_children(vec![
562                Arc::clone(&chunk1),
563                Arc::clone(&chunk2),
564            ]),
565        )
566        .into_layout();
567
568        // Test chunked layout display (no direct segments)
569        assert_eq!(
570            format!("{}", chunked_layout),
571            "vortex.chunked(i64, rows=10)"
572        );
573
574        // Test chunk displays show segments
575        assert_eq!(
576            format!("{}", chunk1),
577            "vortex.flat(i64, rows=5, segments=[2])"
578        );
579        assert_eq!(
580            format!("{}", chunk2),
581            "vortex.flat(i64, rows=5, segments=[3])"
582        );
583
584        // Create struct layout with two fields
585        let field_names: Vec<Arc<str>> = vec!["name".into(), "value".into()];
586        let struct_dtype = DType::Struct(
587            StructFields::new(
588                field_names.into(),
589                vec![
590                    DType::Utf8(NonNullable),
591                    DType::Primitive(PType::I64, NonNullable),
592                ],
593            ),
594            NonNullable,
595        );
596
597        let struct_layout =
598            StructLayout::new(10, struct_dtype, vec![dict_layout, chunked_layout]).into_layout();
599
600        println!("{}", struct_layout.display_tree_verbose(true));
601
602        // Test Display impl for struct (no direct segments)
603        assert_eq!(
604            format!("{}", struct_layout),
605            "vortex.struct({name=utf8, value=i64}, rows=10)"
606        );
607    }
608}