vortex_layout/layouts/chunked/
mod.rs

1mod reader;
2pub mod writer;
3
4use std::collections::BTreeSet;
5use std::sync::Arc;
6
7use vortex_array::{ArrayContext, DeserializeMetadata, EmptyMetadata};
8use vortex_dtype::{DType, FieldMask};
9use vortex_error::VortexResult;
10
11use crate::children::LayoutChildren;
12use crate::layouts::chunked::reader::ChunkedReader;
13use crate::segments::{SegmentId, SegmentSource};
14use crate::{
15    LayoutChildType, LayoutEncodingRef, LayoutId, LayoutReaderRef, LayoutRef, VTable, vtable,
16};
17
18vtable!(Chunked);
19
20impl VTable for ChunkedVTable {
21    type Layout = ChunkedLayout;
22    type Encoding = ChunkedLayoutEncoding;
23    type Metadata = EmptyMetadata;
24
25    fn id(_encoding: &Self::Encoding) -> LayoutId {
26        LayoutId::new_ref("vortex.chunked")
27    }
28
29    fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
30        LayoutEncodingRef::new_ref(ChunkedLayoutEncoding.as_ref())
31    }
32
33    fn row_count(layout: &Self::Layout) -> u64 {
34        layout.row_count
35    }
36
37    fn dtype(layout: &Self::Layout) -> &DType {
38        &layout.dtype
39    }
40
41    fn metadata(_layout: &Self::Layout) -> Self::Metadata {
42        EmptyMetadata
43    }
44
45    fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
46        vec![]
47    }
48
49    fn nchildren(layout: &Self::Layout) -> usize {
50        layout.children.nchildren()
51    }
52
53    fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
54        layout.children.child(idx, &layout.dtype)
55    }
56
57    fn child_type(layout: &Self::Layout, idx: usize) -> LayoutChildType {
58        LayoutChildType::Chunk((idx, layout.chunk_offsets[idx]))
59    }
60
61    fn register_splits(
62        layout: &Self::Layout,
63        field_mask: &[FieldMask],
64        row_offset: u64,
65        splits: &mut BTreeSet<u64>,
66    ) -> VortexResult<()> {
67        let mut offset = row_offset;
68        for i in 0..layout.nchildren() {
69            let child = layout.child(i)?;
70            child.register_splits(field_mask, offset, splits)?;
71            offset += child.row_count();
72            splits.insert(offset);
73        }
74        Ok(())
75    }
76
77    fn new_reader(
78        layout: &Self::Layout,
79        name: &Arc<str>,
80        segment_source: &Arc<dyn SegmentSource>,
81        ctx: &ArrayContext,
82    ) -> VortexResult<LayoutReaderRef> {
83        Ok(Arc::new(ChunkedReader::new(
84            layout.clone(),
85            name.clone(),
86            segment_source.clone(),
87            ctx.clone(),
88        )))
89    }
90
91    fn build(
92        _encoding: &Self::Encoding,
93        dtype: &DType,
94        row_count: u64,
95        _metadata: &<Self::Metadata as DeserializeMetadata>::Output,
96        _segment_ids: Vec<SegmentId>,
97        children: &dyn LayoutChildren,
98    ) -> VortexResult<Self::Layout> {
99        Ok(ChunkedLayout::new(
100            row_count,
101            dtype.clone(),
102            children.to_arc(),
103        ))
104    }
105}
106
107#[derive(Debug)]
108pub struct ChunkedLayoutEncoding;
109
110#[derive(Clone, Debug)]
111pub struct ChunkedLayout {
112    row_count: u64,
113    dtype: DType,
114    children: Arc<dyn LayoutChildren>,
115    chunk_offsets: Vec<u64>,
116}
117
118impl ChunkedLayout {
119    pub fn new(row_count: u64, dtype: DType, children: Arc<dyn LayoutChildren>) -> Self {
120        let mut chunk_offsets = Vec::with_capacity(children.nchildren() + 1);
121
122        chunk_offsets.push(0);
123        for i in 0..children.nchildren() {
124            chunk_offsets.push(chunk_offsets[i] + children.child_row_count(i));
125        }
126        assert_eq!(
127            chunk_offsets[children.nchildren()],
128            row_count,
129            "Row count mismatch"
130        );
131        Self {
132            row_count,
133            dtype,
134            children,
135            chunk_offsets,
136        }
137    }
138}