vortex_layout/layouts/chunked/
mod.rs1mod reader;
2pub mod writer;
3
4use std::collections::BTreeSet;
5use std::sync::Arc;
6
7use vortex_array::{ArrayContext, DeserializeMetadata, EmptyMetadata};
8use vortex_dtype::{DType, FieldMask};
9use vortex_error::VortexResult;
10
11use crate::children::LayoutChildren;
12use crate::layouts::chunked::reader::ChunkedReader;
13use crate::segments::{SegmentId, SegmentSource};
14use crate::{
15 LayoutChildType, LayoutEncodingRef, LayoutId, LayoutReaderRef, LayoutRef, VTable, vtable,
16};
17
18vtable!(Chunked);
19
20impl VTable for ChunkedVTable {
21 type Layout = ChunkedLayout;
22 type Encoding = ChunkedLayoutEncoding;
23 type Metadata = EmptyMetadata;
24
25 fn id(_encoding: &Self::Encoding) -> LayoutId {
26 LayoutId::new_ref("vortex.chunked")
27 }
28
29 fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
30 LayoutEncodingRef::new_ref(ChunkedLayoutEncoding.as_ref())
31 }
32
33 fn row_count(layout: &Self::Layout) -> u64 {
34 layout.row_count
35 }
36
37 fn dtype(layout: &Self::Layout) -> &DType {
38 &layout.dtype
39 }
40
41 fn metadata(_layout: &Self::Layout) -> Self::Metadata {
42 EmptyMetadata
43 }
44
45 fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
46 vec![]
47 }
48
49 fn nchildren(layout: &Self::Layout) -> usize {
50 layout.children.nchildren()
51 }
52
53 fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
54 layout.children.child(idx, &layout.dtype)
55 }
56
57 fn child_type(layout: &Self::Layout, idx: usize) -> LayoutChildType {
58 LayoutChildType::Chunk((idx, layout.chunk_offsets[idx]))
59 }
60
61 fn register_splits(
62 layout: &Self::Layout,
63 field_mask: &[FieldMask],
64 row_offset: u64,
65 splits: &mut BTreeSet<u64>,
66 ) -> VortexResult<()> {
67 let mut offset = row_offset;
68 for i in 0..layout.nchildren() {
69 let child = layout.child(i)?;
70 child.register_splits(field_mask, offset, splits)?;
71 offset += child.row_count();
72 splits.insert(offset);
73 }
74 Ok(())
75 }
76
77 fn new_reader(
78 layout: &Self::Layout,
79 name: &Arc<str>,
80 segment_source: &Arc<dyn SegmentSource>,
81 ctx: &ArrayContext,
82 ) -> VortexResult<LayoutReaderRef> {
83 Ok(Arc::new(ChunkedReader::new(
84 layout.clone(),
85 name.clone(),
86 segment_source.clone(),
87 ctx.clone(),
88 )))
89 }
90
91 fn build(
92 _encoding: &Self::Encoding,
93 dtype: &DType,
94 row_count: u64,
95 _metadata: &<Self::Metadata as DeserializeMetadata>::Output,
96 _segment_ids: Vec<SegmentId>,
97 children: &dyn LayoutChildren,
98 ) -> VortexResult<Self::Layout> {
99 Ok(ChunkedLayout::new(
100 row_count,
101 dtype.clone(),
102 children.to_arc(),
103 ))
104 }
105}
106
107#[derive(Debug)]
108pub struct ChunkedLayoutEncoding;
109
110#[derive(Clone, Debug)]
111pub struct ChunkedLayout {
112 row_count: u64,
113 dtype: DType,
114 children: Arc<dyn LayoutChildren>,
115 chunk_offsets: Vec<u64>,
116}
117
118impl ChunkedLayout {
119 pub fn new(row_count: u64, dtype: DType, children: Arc<dyn LayoutChildren>) -> Self {
120 let mut chunk_offsets = Vec::with_capacity(children.nchildren() + 1);
121
122 chunk_offsets.push(0);
123 for i in 0..children.nchildren() {
124 chunk_offsets.push(chunk_offsets[i] + children.child_row_count(i));
125 }
126 assert_eq!(
127 chunk_offsets[children.nchildren()],
128 row_count,
129 "Row count mismatch"
130 );
131 Self {
132 row_count,
133 dtype,
134 children,
135 chunk_offsets,
136 }
137 }
138}