Skip to main content

vortex_layout/layouts/chunked/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4mod reader;
5pub mod writer;
6
7use std::sync::Arc;
8
9use vortex_array::DeserializeMetadata;
10use vortex_array::EmptyMetadata;
11use vortex_array::dtype::DType;
12use vortex_error::VortexResult;
13use vortex_session::VortexSession;
14use vortex_session::registry::ReadContext;
15
16use crate::LayoutChildType;
17use crate::LayoutEncodingRef;
18use crate::LayoutId;
19use crate::LayoutReaderContext;
20use crate::LayoutReaderRef;
21use crate::LayoutRef;
22use crate::VTable;
23use crate::children::LayoutChildren;
24use crate::children::OwnedLayoutChildren;
25use crate::layouts::chunked::reader::ChunkedReader;
26use crate::segments::SegmentId;
27use crate::segments::SegmentSource;
28use crate::vtable;
29
30vtable!(Chunked);
31
32impl VTable for Chunked {
33    type Layout = ChunkedLayout;
34    type Encoding = ChunkedLayoutEncoding;
35    type Metadata = EmptyMetadata;
36
37    fn id(_encoding: &Self::Encoding) -> LayoutId {
38        LayoutId::new("vortex.chunked")
39    }
40
41    fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
42        LayoutEncodingRef::new_ref(ChunkedLayoutEncoding.as_ref())
43    }
44
45    fn row_count(layout: &Self::Layout) -> u64 {
46        layout.row_count
47    }
48
49    fn dtype(layout: &Self::Layout) -> &DType {
50        &layout.dtype
51    }
52
53    fn metadata(_layout: &Self::Layout) -> Self::Metadata {
54        EmptyMetadata
55    }
56
57    fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
58        vec![]
59    }
60
61    fn nchildren(layout: &Self::Layout) -> usize {
62        layout.children.nchildren()
63    }
64
65    fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
66        layout.children.child(idx, Self::dtype(layout))
67    }
68
69    fn child_type(layout: &Self::Layout, idx: usize) -> LayoutChildType {
70        LayoutChildType::Chunk((idx, layout.chunk_offsets[idx]))
71    }
72
73    fn new_reader(
74        layout: &Self::Layout,
75        name: Arc<str>,
76        segment_source: Arc<dyn SegmentSource>,
77        session: &VortexSession,
78        ctx: &LayoutReaderContext,
79    ) -> VortexResult<LayoutReaderRef> {
80        Ok(Arc::new(ChunkedReader::new(
81            layout.clone(),
82            name,
83            segment_source,
84            session,
85            ctx.clone(),
86        )))
87    }
88
89    fn build(
90        _encoding: &Self::Encoding,
91        dtype: &DType,
92        row_count: u64,
93        _metadata: &<Self::Metadata as DeserializeMetadata>::Output,
94        _segment_ids: Vec<SegmentId>,
95        children: &dyn LayoutChildren,
96        _ctx: &ReadContext,
97    ) -> VortexResult<Self::Layout> {
98        Ok(ChunkedLayout::new(
99            row_count,
100            dtype.clone(),
101            children.to_arc(),
102        ))
103    }
104
105    fn with_children(layout: &mut Self::Layout, children: Vec<LayoutRef>) -> VortexResult<()> {
106        let new_children = OwnedLayoutChildren::layout_children(children);
107
108        // Recalculate chunk offsets based on new children
109        let mut chunk_offsets = vec![0; new_children.nchildren() + 1];
110        for i in 0..new_children.nchildren() {
111            chunk_offsets[i + 1] = chunk_offsets[i] + new_children.child_row_count(i);
112        }
113
114        layout.children = new_children;
115        layout.chunk_offsets = chunk_offsets;
116        Ok(())
117    }
118}
119
120#[derive(Debug)]
121pub struct ChunkedLayoutEncoding;
122
123/// Partitions a column into row-based chunks so that each chunk can be read independently.
124///
125/// Used to break large columns into smaller pieces for parallel I/O and to limit memory
126/// usage when scanning.
127#[derive(Clone, Debug)]
128pub struct ChunkedLayout {
129    row_count: u64,
130    dtype: DType,
131    children: Arc<dyn LayoutChildren>,
132    chunk_offsets: Vec<u64>,
133}
134
135impl ChunkedLayout {
136    pub fn new(row_count: u64, dtype: DType, children: Arc<dyn LayoutChildren>) -> Self {
137        let mut chunk_offsets = vec![0; children.nchildren() + 1];
138        for i in 0..children.nchildren() {
139            chunk_offsets[i + 1] = chunk_offsets[i] + children.child_row_count(i);
140        }
141
142        assert_eq!(
143            chunk_offsets[children.nchildren()],
144            row_count,
145            "Row count mismatch"
146        );
147        Self {
148            row_count,
149            dtype,
150            children,
151            chunk_offsets,
152        }
153    }
154
155    pub fn children(&self) -> &Arc<dyn LayoutChildren> {
156        &self.children
157    }
158}