vortex_layout/layouts/zoned/
mod.rs1mod builder;
5mod reader;
6pub mod writer;
7pub mod zone_map;
8
9use std::sync::Arc;
10
11pub use builder::MAX_IS_TRUNCATED;
12pub use builder::MIN_IS_TRUNCATED;
13use vortex_array::ArrayContext;
14use vortex_array::DeserializeMetadata;
15use vortex_array::SerializeMetadata;
16use vortex_array::expr::stats::Stat;
17use vortex_array::stats::as_stat_bitset_bytes;
18use vortex_array::stats::stats_from_bitset_bytes;
19use vortex_dtype::DType;
20use vortex_dtype::TryFromBytes;
21use vortex_error::VortexExpect;
22use vortex_error::VortexResult;
23use vortex_error::vortex_bail;
24use vortex_error::vortex_panic;
25use vortex_session::VortexSession;
26
27use crate::LayoutChildType;
28use crate::LayoutEncodingRef;
29use crate::LayoutId;
30use crate::LayoutReaderRef;
31use crate::LayoutRef;
32use crate::VTable;
33use crate::children::LayoutChildren;
34use crate::children::OwnedLayoutChildren;
35use crate::layouts::zoned::reader::ZonedReader;
36use crate::layouts::zoned::zone_map::ZoneMap;
37use crate::segments::SegmentId;
38use crate::segments::SegmentSource;
39use crate::vtable;
40
41vtable!(Zoned);
42
43impl VTable for ZonedVTable {
44 type Layout = ZonedLayout;
45 type Encoding = ZonedLayoutEncoding;
46 type Metadata = ZonedMetadata;
47
48 fn id(_encoding: &Self::Encoding) -> LayoutId {
49 LayoutId::new_ref("vortex.stats") }
51
52 fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
53 LayoutEncodingRef::new_ref(ZonedLayoutEncoding.as_ref())
54 }
55
56 fn row_count(layout: &Self::Layout) -> u64 {
57 layout.children.child_row_count(0)
58 }
59
60 fn dtype(layout: &Self::Layout) -> &DType {
61 &layout.dtype
62 }
63
64 fn metadata(layout: &Self::Layout) -> Self::Metadata {
65 ZonedMetadata {
66 zone_len: u32::try_from(layout.zone_len).vortex_expect("Invalid zone length"),
67 present_stats: layout.present_stats.clone(),
68 }
69 }
70
71 fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
72 vec![]
73 }
74
75 fn nchildren(_layout: &Self::Layout) -> usize {
76 2
77 }
78
79 fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
80 match idx {
81 0 => layout.children.child(0, layout.dtype()),
82 1 => layout.children.child(
83 1,
84 &ZoneMap::dtype_for_stats_table(layout.dtype(), &layout.present_stats),
85 ),
86 _ => vortex_bail!("Invalid child index: {}", idx),
87 }
88 }
89
90 fn child_type(_layout: &Self::Layout, idx: usize) -> LayoutChildType {
91 match idx {
92 0 => LayoutChildType::Transparent("data".into()),
93 1 => LayoutChildType::Auxiliary("zones".into()),
94 _ => vortex_panic!("Invalid child index: {}", idx),
95 }
96 }
97
98 fn new_reader(
99 layout: &Self::Layout,
100 name: Arc<str>,
101 segment_source: Arc<dyn SegmentSource>,
102 session: &VortexSession,
103 ) -> VortexResult<LayoutReaderRef> {
104 Ok(Arc::new(ZonedReader::try_new(
105 layout.clone(),
106 name,
107 segment_source,
108 session.clone(),
109 )?))
110 }
111
112 fn build(
113 _encoding: &Self::Encoding,
114 dtype: &DType,
115 _row_count: u64,
116 metadata: &ZonedMetadata,
117 _segment_ids: Vec<SegmentId>,
118 children: &dyn LayoutChildren,
119 _ctx: &ArrayContext,
120 ) -> VortexResult<Self::Layout> {
121 Ok(ZonedLayout {
122 dtype: dtype.clone(),
123 children: children.to_arc(),
124 zone_len: metadata.zone_len as usize,
125 present_stats: metadata.present_stats.clone(),
126 })
127 }
128
129 fn with_children(layout: &mut Self::Layout, children: Vec<LayoutRef>) -> VortexResult<()> {
130 if children.len() != 2 {
131 vortex_bail!(
132 "ZonedLayout expects exactly 2 children (data, zones), got {}",
133 children.len()
134 );
135 }
136 layout.children = OwnedLayoutChildren::layout_children(children);
137 Ok(())
138 }
139}
140
141#[derive(Debug)]
142pub struct ZonedLayoutEncoding;
143
144#[derive(Clone, Debug)]
145pub struct ZonedLayout {
146 dtype: DType,
147 children: Arc<dyn LayoutChildren>,
148 zone_len: usize,
149 present_stats: Arc<[Stat]>,
150}
151
152impl ZonedLayout {
153 pub fn new(
154 data: LayoutRef,
155 zones: LayoutRef,
156 zone_len: usize,
157 present_stats: Arc<[Stat]>,
158 ) -> Self {
159 if zone_len == 0 {
160 vortex_panic!("Zone length must be greater than 0");
161 }
162 let expected_dtype = ZoneMap::dtype_for_stats_table(data.dtype(), &present_stats);
163 if zones.dtype() != &expected_dtype {
164 vortex_panic!("Invalid zone map layout: zones dtype does not match expected dtype");
165 }
166 Self {
167 dtype: data.dtype().clone(),
168 children: OwnedLayoutChildren::layout_children(vec![data, zones]),
169 zone_len,
170 present_stats,
171 }
172 }
173
174 pub fn nzones(&self) -> usize {
175 usize::try_from(self.children.child_row_count(1)).vortex_expect("Invalid number of zones")
176 }
177
178 pub fn present_stats(&self) -> &Arc<[Stat]> {
180 &self.present_stats
181 }
182}
183
184#[derive(Debug, PartialEq, Eq, Clone)]
185pub struct ZonedMetadata {
186 pub(super) zone_len: u32,
187 pub(super) present_stats: Arc<[Stat]>,
188}
189
190impl DeserializeMetadata for ZonedMetadata {
191 type Output = Self;
192
193 fn deserialize(metadata: &[u8]) -> VortexResult<Self::Output> {
194 let zone_len = u32::try_from_le_bytes(&metadata[0..4])?;
195 let present_stats: Arc<[Stat]> = stats_from_bitset_bytes(&metadata[4..]).into();
196 Ok(Self {
197 zone_len,
198 present_stats,
199 })
200 }
201}
202
203impl SerializeMetadata for ZonedMetadata {
204 fn serialize(self) -> Vec<u8> {
205 let mut metadata = vec![];
206 metadata.extend_from_slice(&self.zone_len.to_le_bytes());
208 metadata.extend_from_slice(&as_stat_bitset_bytes(&self.present_stats));
210 metadata
211 }
212}
213
214#[cfg(test)]
215mod tests {
216 use rstest::rstest;
217
218 use super::*;
219
220 #[rstest]
221 #[case(ZonedMetadata {
222 zone_len: u32::MAX,
223 present_stats: Arc::new([]),
224 })]
225 #[case(ZonedMetadata {
226 zone_len: 0,
227 present_stats: Arc::new([Stat::IsConstant]),
228 })]
229 #[case::all_sorted(ZonedMetadata {
230 zone_len: 314,
231 present_stats: Arc::new([Stat::IsConstant, Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
232 })]
233 #[case::some_sorted(ZonedMetadata {
234 zone_len: 314,
235 present_stats: Arc::new([Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
236 })]
237 fn test_metadata_serialization(#[case] metadata: ZonedMetadata) {
238 let serialized = metadata.clone().serialize();
239 let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
240 assert_eq!(deserialized, metadata);
241 }
242
243 #[test]
244 fn test_deserialize_unsorted_stats() {
245 let metadata = ZonedMetadata {
246 zone_len: u32::MAX,
247 present_stats: Arc::new([Stat::IsStrictSorted, Stat::IsSorted]),
248 };
249 let serialized = metadata.clone().serialize();
250 let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
251 assert!(deserialized.present_stats.is_sorted());
252 assert_eq!(
253 deserialized.present_stats.len(),
254 metadata.present_stats.len()
255 );
256 assert_ne!(deserialized.present_stats, metadata.present_stats);
257 }
258}