vortex_layout/layouts/zoned/
mod.rs1mod builder;
15mod pruning;
16mod reader;
17mod schema;
18pub mod writer;
19pub mod zone_map;
20
21use std::sync::Arc;
22
23pub(crate) use builder::StatsAccumulator;
24pub use schema::MAX_IS_TRUNCATED;
25pub use schema::MIN_IS_TRUNCATED;
26use vortex_array::DeserializeMetadata;
27use vortex_array::SerializeMetadata;
28use vortex_array::dtype::DType;
29use vortex_array::dtype::TryFromBytes;
30use vortex_array::expr::stats::Stat;
31use vortex_array::stats::as_stat_bitset_bytes;
32use vortex_array::stats::stats_from_bitset_bytes;
33use vortex_error::VortexExpect;
34use vortex_error::VortexResult;
35use vortex_error::vortex_bail;
36use vortex_error::vortex_ensure;
37use vortex_error::vortex_ensure_eq;
38use vortex_error::vortex_panic;
39use vortex_session::VortexSession;
40use vortex_session::registry::ReadContext;
41
42use crate::LayoutChildType;
43use crate::LayoutEncodingRef;
44use crate::LayoutId;
45use crate::LayoutReaderRef;
46use crate::LayoutRef;
47use crate::VTable;
48use crate::children::LayoutChildren;
49use crate::children::OwnedLayoutChildren;
50use crate::layouts::zoned::reader::ZonedReader;
51use crate::layouts::zoned::schema::stats_table_dtype;
52use crate::segments::SegmentId;
53use crate::segments::SegmentSource;
54use crate::vtable;
55
56vtable!(Zoned);
57
58impl VTable for Zoned {
59 type Layout = ZonedLayout;
60 type Encoding = ZonedLayoutEncoding;
61 type Metadata = ZonedMetadata;
62
63 fn id(_encoding: &Self::Encoding) -> LayoutId {
64 LayoutId::new("vortex.stats")
66 }
67
68 fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
69 LayoutEncodingRef::new_ref(ZonedLayoutEncoding.as_ref())
70 }
71
72 fn row_count(layout: &Self::Layout) -> u64 {
73 layout.children.child_row_count(0)
74 }
75
76 fn dtype(layout: &Self::Layout) -> &DType {
77 &layout.dtype
78 }
79
80 fn metadata(layout: &Self::Layout) -> Self::Metadata {
81 ZonedMetadata {
82 zone_len: u32::try_from(layout.zone_len).vortex_expect("Invalid zone length"),
83 present_stats: Arc::clone(&layout.present_stats),
84 }
85 }
86
87 fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
88 vec![]
89 }
90
91 fn nchildren(_layout: &Self::Layout) -> usize {
92 2
93 }
94
95 fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
96 match idx {
97 0 => layout.children.child(0, layout.dtype()),
98 1 => layout
99 .children
100 .child(1, &stats_table_dtype(layout.dtype(), &layout.present_stats)),
101 _ => vortex_bail!("Invalid child index: {}", idx),
102 }
103 }
104
105 fn child_type(_layout: &Self::Layout, idx: usize) -> LayoutChildType {
106 match idx {
107 0 => LayoutChildType::Transparent("data".into()),
108 1 => LayoutChildType::Auxiliary("zones".into()),
109 _ => vortex_panic!("Invalid child index: {}", idx),
110 }
111 }
112
113 fn new_reader(
114 layout: &Self::Layout,
115 name: Arc<str>,
116 segment_source: Arc<dyn SegmentSource>,
117 session: &VortexSession,
118 ) -> VortexResult<LayoutReaderRef> {
119 Ok(Arc::new(ZonedReader::try_new(
120 layout.clone(),
121 name,
122 segment_source,
123 session.clone(),
124 )?))
125 }
126
127 fn build(
128 _encoding: &Self::Encoding,
129 dtype: &DType,
130 _row_count: u64,
131 metadata: &ZonedMetadata,
132 _segment_ids: Vec<SegmentId>,
133 children: &dyn LayoutChildren,
134 _ctx: &ReadContext,
135 ) -> VortexResult<Self::Layout> {
136 vortex_ensure_eq!(
137 children.nchildren(),
138 2,
139 "ZonedLayout expects exactly 2 children (data, zones)"
140 );
141 Ok(ZonedLayout {
142 dtype: dtype.clone(),
143 children: children.to_arc(),
144 zone_len: metadata.zone_len as usize,
145 present_stats: Arc::clone(&metadata.present_stats),
146 })
147 }
148
149 fn with_children(layout: &mut Self::Layout, children: Vec<LayoutRef>) -> VortexResult<()> {
150 if children.len() != 2 {
151 vortex_bail!(
152 "ZonedLayout expects exactly 2 children (data, zones), got {}",
153 children.len()
154 );
155 }
156 layout.children = OwnedLayoutChildren::layout_children(children);
157 Ok(())
158 }
159}
160
161#[derive(Debug)]
163pub struct ZonedLayoutEncoding;
164
165#[derive(Clone, Debug)]
171pub struct ZonedLayout {
172 dtype: DType,
173 children: Arc<dyn LayoutChildren>,
174 zone_len: usize,
175 present_stats: Arc<[Stat]>,
176}
177
178impl ZonedLayout {
179 pub fn new(
180 data: LayoutRef,
181 zones: LayoutRef,
182 zone_len: usize,
183 present_stats: Arc<[Stat]>,
184 ) -> Self {
185 if zone_len == 0 {
186 vortex_panic!("Zone length must be greater than 0");
187 }
188 let expected_dtype = stats_table_dtype(data.dtype(), &present_stats);
189 if zones.dtype() != &expected_dtype {
190 vortex_panic!("Invalid zone map layout: zones dtype does not match expected dtype");
191 }
192 Self {
193 dtype: data.dtype().clone(),
194 children: OwnedLayoutChildren::layout_children(vec![data, zones]),
195 zone_len,
196 present_stats,
197 }
198 }
199
200 pub fn nzones(&self) -> usize {
201 usize::try_from(self.children.child_row_count(1)).vortex_expect("Invalid number of zones")
202 }
203
204 pub fn zone_len(&self) -> usize {
205 self.zone_len
206 }
207
208 pub fn present_stats(&self) -> &Arc<[Stat]> {
210 &self.present_stats
211 }
212}
213
214#[derive(Debug, PartialEq, Eq, Clone)]
219pub struct ZonedMetadata {
220 pub(super) zone_len: u32,
221 pub(super) present_stats: Arc<[Stat]>,
222}
223
224impl DeserializeMetadata for ZonedMetadata {
225 type Output = Self;
226
227 fn deserialize(metadata: &[u8]) -> VortexResult<Self::Output> {
228 vortex_ensure!(
229 metadata.len() >= 4,
230 "Zoned metadata must contain at least 4 bytes for zone length, got {}",
231 metadata.len()
232 );
233
234 let zone_len = u32::try_from_le_bytes(&metadata[0..4])?;
238 let present_stats: Arc<[Stat]> = stats_from_bitset_bytes(&metadata[4..]).into();
239
240 Ok(Self {
241 zone_len,
242 present_stats,
243 })
244 }
245}
246
247impl SerializeMetadata for ZonedMetadata {
248 fn serialize(self) -> Vec<u8> {
249 let mut metadata = vec![];
250 metadata.extend_from_slice(&self.zone_len.to_le_bytes());
252 metadata.extend_from_slice(&as_stat_bitset_bytes(&self.present_stats));
254 metadata
255 }
256}
257
258#[cfg(test)]
259mod tests {
260 use std::panic;
261
262 use rstest::rstest;
263 use vortex_array::dtype::DType;
264 use vortex_array::dtype::Nullability;
265 use vortex_array::dtype::PType;
266 use vortex_session::registry::ReadContext;
267
268 use super::*;
269 use crate::IntoLayout;
270 use crate::children::OwnedLayoutChildren;
271 use crate::layouts::flat::FlatLayout;
272 use crate::segments::SegmentId;
273
274 #[rstest]
275 #[case(ZonedMetadata {
276 zone_len: u32::MAX,
277 present_stats: Arc::new([]),
278 })]
279 #[case::all_sorted(ZonedMetadata {
280 zone_len: 314,
281 present_stats: Arc::new([Stat::IsConstant, Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
282 })]
283 #[case::some_sorted(ZonedMetadata {
284 zone_len: 314,
285 present_stats: Arc::new([Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
286 })]
287 fn test_metadata_serialization(#[case] metadata: ZonedMetadata) {
288 let serialized = metadata.clone().serialize();
289 let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
290 assert_eq!(deserialized, metadata);
291 }
292
293 #[test]
294 fn test_deserialize_unsorted_stats() {
295 let metadata = ZonedMetadata {
296 zone_len: u32::MAX,
297 present_stats: Arc::new([Stat::IsStrictSorted, Stat::IsSorted]),
298 };
299 let serialized = metadata.clone().serialize();
300 let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
301 assert!(deserialized.present_stats.is_sorted());
302 assert_eq!(
303 deserialized.present_stats.len(),
304 metadata.present_stats.len()
305 );
306 assert_ne!(deserialized.present_stats, metadata.present_stats);
307 }
308
309 #[rstest]
310 #[case(vec![])]
311 #[case(vec![0])]
312 #[case(vec![0, 0])]
313 #[case(vec![0, 0, 0])]
314 fn test_deserialize_short_metadata_errors(#[case] metadata: Vec<u8>) {
315 assert!(ZonedMetadata::deserialize(&metadata).is_err());
316 }
317
318 #[test]
319 fn test_deserialize_short_metadata_returns_error_not_panic() {
320 let result = panic::catch_unwind(|| ZonedMetadata::deserialize(&[]));
321 assert!(
322 result.is_ok(),
323 "deserialize should return an error, not panic"
324 );
325 assert!(result.unwrap().is_err());
326 }
327
328 #[test]
329 fn test_deserialize_zero_zone_len_is_allowed_for_backcompat() {
330 let metadata = 0u32.to_le_bytes();
331 let deserialized = ZonedMetadata::deserialize(&metadata).unwrap();
332 assert_eq!(deserialized.zone_len, 0);
333 assert!(deserialized.present_stats.is_empty());
334 }
335
336 #[test]
337 fn test_build_allows_zero_zone_len_for_backcompat() -> VortexResult<()> {
338 let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
339 let read_ctx = ReadContext::new([]);
340 let children = OwnedLayoutChildren::layout_children(vec![
341 FlatLayout::new(0, dtype.clone(), SegmentId::from(0), read_ctx.clone()).into_layout(),
342 FlatLayout::new(
343 0,
344 stats_table_dtype(&dtype, &[]),
345 SegmentId::from(1),
346 read_ctx,
347 )
348 .into_layout(),
349 ]);
350
351 let layout = <Zoned as VTable>::build(
352 &ZonedLayoutEncoding,
353 &dtype,
354 0,
355 &ZonedMetadata {
356 zone_len: 0,
357 present_stats: Arc::new([]),
358 },
359 vec![],
360 children.as_ref(),
361 &ReadContext::new([]),
362 )?;
363
364 assert_eq!(layout.zone_len, 0);
365 Ok(())
366 }
367
368 #[test]
369 fn test_build_rejects_invalid_child_count() {
370 let metadata = ZonedMetadata {
371 zone_len: 3,
372 present_stats: Arc::new([]),
373 };
374 let children = OwnedLayoutChildren::layout_children(vec![]);
375
376 let result = <Zoned as VTable>::build(
377 &ZonedLayoutEncoding,
378 &DType::Primitive(PType::I32, Nullability::NonNullable),
379 0,
380 &metadata,
381 vec![],
382 children.as_ref(),
383 &ReadContext::new([]),
384 );
385
386 assert!(result.is_err());
387 }
388}