vortex_layout/layouts/zoned/
mod.rs1mod builder;
15mod pruning;
16mod reader;
17mod schema;
18pub mod writer;
19pub mod zone_map;
20
21use std::sync::Arc;
22
23pub(crate) use builder::StatsAccumulator;
24pub use schema::MAX_IS_TRUNCATED;
25pub use schema::MIN_IS_TRUNCATED;
26use vortex_array::DeserializeMetadata;
27use vortex_array::SerializeMetadata;
28use vortex_array::dtype::DType;
29use vortex_array::dtype::TryFromBytes;
30use vortex_array::expr::stats::Stat;
31use vortex_array::stats::as_stat_bitset_bytes;
32use vortex_array::stats::stats_from_bitset_bytes;
33use vortex_error::VortexExpect;
34use vortex_error::VortexResult;
35use vortex_error::vortex_bail;
36use vortex_error::vortex_ensure;
37use vortex_error::vortex_ensure_eq;
38use vortex_error::vortex_panic;
39use vortex_session::VortexSession;
40use vortex_session::registry::ReadContext;
41
42use crate::LayoutChildType;
43use crate::LayoutEncodingRef;
44use crate::LayoutId;
45use crate::LayoutReaderRef;
46use crate::LayoutRef;
47use crate::VTable;
48use crate::children::LayoutChildren;
49use crate::children::OwnedLayoutChildren;
50use crate::layouts::zoned::reader::ZonedReader;
51use crate::layouts::zoned::schema::stats_table_dtype;
52use crate::segments::SegmentId;
53use crate::segments::SegmentSource;
54use crate::vtable;
55
56vtable!(Zoned);
57
58impl VTable for Zoned {
59 type Layout = ZonedLayout;
60 type Encoding = ZonedLayoutEncoding;
61 type Metadata = ZonedMetadata;
62
63 fn id(_encoding: &Self::Encoding) -> LayoutId {
64 LayoutId::new("vortex.stats")
66 }
67
68 fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
69 LayoutEncodingRef::new_ref(ZonedLayoutEncoding.as_ref())
70 }
71
72 fn row_count(layout: &Self::Layout) -> u64 {
73 layout.children.child_row_count(0)
74 }
75
76 fn dtype(layout: &Self::Layout) -> &DType {
77 &layout.dtype
78 }
79
80 fn metadata(layout: &Self::Layout) -> Self::Metadata {
81 ZonedMetadata {
82 zone_len: u32::try_from(layout.zone_len).vortex_expect("Invalid zone length"),
83 present_stats: Arc::clone(&layout.present_stats),
84 }
85 }
86
87 fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
88 vec![]
89 }
90
91 fn nchildren(_layout: &Self::Layout) -> usize {
92 2
93 }
94
95 fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
96 match idx {
97 0 => layout.children.child(0, layout.dtype()),
98 1 => layout
99 .children
100 .child(1, &stats_table_dtype(layout.dtype(), &layout.present_stats)),
101 _ => vortex_bail!("Invalid child index: {}", idx),
102 }
103 }
104
105 fn child_type(_layout: &Self::Layout, idx: usize) -> LayoutChildType {
106 match idx {
107 0 => LayoutChildType::Transparent("data".into()),
108 1 => LayoutChildType::Auxiliary("zones".into()),
109 _ => vortex_panic!("Invalid child index: {}", idx),
110 }
111 }
112
113 fn new_reader(
114 layout: &Self::Layout,
115 name: Arc<str>,
116 segment_source: Arc<dyn SegmentSource>,
117 session: &VortexSession,
118 ctx: &crate::LayoutReaderContext,
119 ) -> VortexResult<LayoutReaderRef> {
120 Ok(Arc::new(ZonedReader::try_new(
121 layout.clone(),
122 name,
123 segment_source,
124 session.clone(),
125 ctx.clone(),
126 )?))
127 }
128
129 fn build(
130 _encoding: &Self::Encoding,
131 dtype: &DType,
132 _row_count: u64,
133 metadata: &ZonedMetadata,
134 _segment_ids: Vec<SegmentId>,
135 children: &dyn LayoutChildren,
136 _ctx: &ReadContext,
137 ) -> VortexResult<Self::Layout> {
138 vortex_ensure_eq!(
139 children.nchildren(),
140 2,
141 "ZonedLayout expects exactly 2 children (data, zones)"
142 );
143 Ok(ZonedLayout {
144 dtype: dtype.clone(),
145 children: children.to_arc(),
146 zone_len: metadata.zone_len as usize,
147 present_stats: Arc::clone(&metadata.present_stats),
148 })
149 }
150
151 fn with_children(layout: &mut Self::Layout, children: Vec<LayoutRef>) -> VortexResult<()> {
152 if children.len() != 2 {
153 vortex_bail!(
154 "ZonedLayout expects exactly 2 children (data, zones), got {}",
155 children.len()
156 );
157 }
158 layout.children = OwnedLayoutChildren::layout_children(children);
159 Ok(())
160 }
161}
162
163#[derive(Debug)]
165pub struct ZonedLayoutEncoding;
166
167#[derive(Clone, Debug)]
173pub struct ZonedLayout {
174 dtype: DType,
175 children: Arc<dyn LayoutChildren>,
176 zone_len: usize,
177 present_stats: Arc<[Stat]>,
178}
179
180impl ZonedLayout {
181 pub fn new(
182 data: LayoutRef,
183 zones: LayoutRef,
184 zone_len: usize,
185 present_stats: Arc<[Stat]>,
186 ) -> Self {
187 if zone_len == 0 {
188 vortex_panic!("Zone length must be greater than 0");
189 }
190 let expected_dtype = stats_table_dtype(data.dtype(), &present_stats);
191 if zones.dtype() != &expected_dtype {
192 vortex_panic!("Invalid zone map layout: zones dtype does not match expected dtype");
193 }
194 Self {
195 dtype: data.dtype().clone(),
196 children: OwnedLayoutChildren::layout_children(vec![data, zones]),
197 zone_len,
198 present_stats,
199 }
200 }
201
202 pub fn nzones(&self) -> usize {
203 usize::try_from(self.children.child_row_count(1)).vortex_expect("Invalid number of zones")
204 }
205
206 pub fn zone_len(&self) -> usize {
207 self.zone_len
208 }
209
210 pub fn present_stats(&self) -> &Arc<[Stat]> {
212 &self.present_stats
213 }
214}
215
216#[derive(Debug, PartialEq, Eq, Clone)]
221pub struct ZonedMetadata {
222 pub(super) zone_len: u32,
223 pub(super) present_stats: Arc<[Stat]>,
224}
225
226impl DeserializeMetadata for ZonedMetadata {
227 type Output = Self;
228
229 fn deserialize(metadata: &[u8]) -> VortexResult<Self::Output> {
230 vortex_ensure!(
231 metadata.len() >= 4,
232 "Zoned metadata must contain at least 4 bytes for zone length, got {}",
233 metadata.len()
234 );
235
236 let zone_len = u32::try_from_le_bytes(&metadata[0..4])?;
240 let present_stats: Arc<[Stat]> = stats_from_bitset_bytes(&metadata[4..]).into();
241
242 Ok(Self {
243 zone_len,
244 present_stats,
245 })
246 }
247}
248
249impl SerializeMetadata for ZonedMetadata {
250 fn serialize(self) -> Vec<u8> {
251 let mut metadata = vec![];
252 metadata.extend_from_slice(&self.zone_len.to_le_bytes());
254 metadata.extend_from_slice(&as_stat_bitset_bytes(&self.present_stats));
256 metadata
257 }
258}
259
260#[cfg(test)]
261mod tests {
262 use std::panic;
263
264 use rstest::rstest;
265 use vortex_array::dtype::DType;
266 use vortex_array::dtype::Nullability;
267 use vortex_array::dtype::PType;
268 use vortex_session::registry::ReadContext;
269
270 use super::*;
271 use crate::IntoLayout;
272 use crate::children::OwnedLayoutChildren;
273 use crate::layouts::flat::FlatLayout;
274 use crate::segments::SegmentId;
275
276 #[rstest]
277 #[case(ZonedMetadata {
278 zone_len: u32::MAX,
279 present_stats: Arc::new([]),
280 })]
281 #[case::all_sorted(ZonedMetadata {
282 zone_len: 314,
283 present_stats: Arc::new([Stat::IsConstant, Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
284 })]
285 #[case::some_sorted(ZonedMetadata {
286 zone_len: 314,
287 present_stats: Arc::new([Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
288 })]
289 fn test_metadata_serialization(#[case] metadata: ZonedMetadata) {
290 let serialized = metadata.clone().serialize();
291 let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
292 assert_eq!(deserialized, metadata);
293 }
294
295 #[test]
296 fn test_deserialize_unsorted_stats() {
297 let metadata = ZonedMetadata {
298 zone_len: u32::MAX,
299 present_stats: Arc::new([Stat::IsStrictSorted, Stat::IsSorted]),
300 };
301 let serialized = metadata.clone().serialize();
302 let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
303 assert!(deserialized.present_stats.is_sorted());
304 assert_eq!(
305 deserialized.present_stats.len(),
306 metadata.present_stats.len()
307 );
308 assert_ne!(deserialized.present_stats, metadata.present_stats);
309 }
310
311 #[rstest]
312 #[case(vec![])]
313 #[case(vec![0])]
314 #[case(vec![0, 0])]
315 #[case(vec![0, 0, 0])]
316 fn test_deserialize_short_metadata_errors(#[case] metadata: Vec<u8>) {
317 assert!(ZonedMetadata::deserialize(&metadata).is_err());
318 }
319
320 #[test]
321 fn test_deserialize_short_metadata_returns_error_not_panic() {
322 let result = panic::catch_unwind(|| ZonedMetadata::deserialize(&[]));
323 assert!(
324 result.is_ok(),
325 "deserialize should return an error, not panic"
326 );
327 assert!(result.unwrap().is_err());
328 }
329
330 #[test]
331 fn test_deserialize_zero_zone_len_is_allowed_for_backcompat() {
332 let metadata = 0u32.to_le_bytes();
333 let deserialized = ZonedMetadata::deserialize(&metadata).unwrap();
334 assert_eq!(deserialized.zone_len, 0);
335 assert!(deserialized.present_stats.is_empty());
336 }
337
338 #[test]
339 fn test_build_allows_zero_zone_len_for_backcompat() -> VortexResult<()> {
340 let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
341 let read_ctx = ReadContext::new([]);
342 let children = OwnedLayoutChildren::layout_children(vec![
343 FlatLayout::new(0, dtype.clone(), SegmentId::from(0), read_ctx.clone()).into_layout(),
344 FlatLayout::new(
345 0,
346 stats_table_dtype(&dtype, &[]),
347 SegmentId::from(1),
348 read_ctx,
349 )
350 .into_layout(),
351 ]);
352
353 let layout = <Zoned as VTable>::build(
354 &ZonedLayoutEncoding,
355 &dtype,
356 0,
357 &ZonedMetadata {
358 zone_len: 0,
359 present_stats: Arc::new([]),
360 },
361 vec![],
362 children.as_ref(),
363 &ReadContext::new([]),
364 )?;
365
366 assert_eq!(layout.zone_len, 0);
367 Ok(())
368 }
369
370 #[test]
371 fn test_build_rejects_invalid_child_count() {
372 let metadata = ZonedMetadata {
373 zone_len: 3,
374 present_stats: Arc::new([]),
375 };
376 let children = OwnedLayoutChildren::layout_children(vec![]);
377
378 let result = <Zoned as VTable>::build(
379 &ZonedLayoutEncoding,
380 &DType::Primitive(PType::I32, Nullability::NonNullable),
381 0,
382 &metadata,
383 vec![],
384 children.as_ref(),
385 &ReadContext::new([]),
386 );
387
388 assert!(result.is_err());
389 }
390}