vortex_layout/layouts/zoned/
mod.rs1mod builder;
5mod reader;
6pub mod writer;
7pub mod zone_map;
8
9use std::sync::Arc;
10
11pub use builder::MAX_IS_TRUNCATED;
12pub use builder::MIN_IS_TRUNCATED;
13pub use builder::lower_bound;
14pub use builder::upper_bound;
15use vortex_array::ArrayContext;
16use vortex_array::DeserializeMetadata;
17use vortex_array::SerializeMetadata;
18use vortex_array::expr::stats::Stat;
19use vortex_array::stats::as_stat_bitset_bytes;
20use vortex_array::stats::stats_from_bitset_bytes;
21use vortex_dtype::DType;
22use vortex_dtype::TryFromBytes;
23use vortex_error::VortexExpect;
24use vortex_error::VortexResult;
25use vortex_error::vortex_bail;
26use vortex_error::vortex_panic;
27use vortex_session::VortexSession;
28
29use crate::LayoutChildType;
30use crate::LayoutEncodingRef;
31use crate::LayoutId;
32use crate::LayoutReaderRef;
33use crate::LayoutRef;
34use crate::VTable;
35use crate::children::LayoutChildren;
36use crate::children::OwnedLayoutChildren;
37use crate::layouts::zoned::reader::ZonedReader;
38use crate::layouts::zoned::zone_map::ZoneMap;
39use crate::segments::SegmentId;
40use crate::segments::SegmentSource;
41use crate::vtable;
42
43vtable!(Zoned);
44
45impl VTable for ZonedVTable {
46 type Layout = ZonedLayout;
47 type Encoding = ZonedLayoutEncoding;
48 type Metadata = ZonedMetadata;
49
50 fn id(_encoding: &Self::Encoding) -> LayoutId {
51 LayoutId::new_ref("vortex.stats") }
53
54 fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
55 LayoutEncodingRef::new_ref(ZonedLayoutEncoding.as_ref())
56 }
57
58 fn row_count(layout: &Self::Layout) -> u64 {
59 layout.children.child_row_count(0)
60 }
61
62 fn dtype(layout: &Self::Layout) -> &DType {
63 &layout.dtype
64 }
65
66 fn metadata(layout: &Self::Layout) -> Self::Metadata {
67 ZonedMetadata {
68 zone_len: u32::try_from(layout.zone_len).vortex_expect("Invalid zone length"),
69 present_stats: layout.present_stats.clone(),
70 }
71 }
72
73 fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
74 vec![]
75 }
76
77 fn nchildren(_layout: &Self::Layout) -> usize {
78 2
79 }
80
81 fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
82 match idx {
83 0 => layout.children.child(0, layout.dtype()),
84 1 => layout.children.child(
85 1,
86 &ZoneMap::dtype_for_stats_table(layout.dtype(), &layout.present_stats),
87 ),
88 _ => vortex_bail!("Invalid child index: {}", idx),
89 }
90 }
91
92 fn child_type(_layout: &Self::Layout, idx: usize) -> LayoutChildType {
93 match idx {
94 0 => LayoutChildType::Transparent("data".into()),
95 1 => LayoutChildType::Auxiliary("zones".into()),
96 _ => vortex_panic!("Invalid child index: {}", idx),
97 }
98 }
99
100 fn new_reader(
101 layout: &Self::Layout,
102 name: Arc<str>,
103 segment_source: Arc<dyn SegmentSource>,
104 session: &VortexSession,
105 ) -> VortexResult<LayoutReaderRef> {
106 Ok(Arc::new(ZonedReader::try_new(
107 layout.clone(),
108 name,
109 segment_source,
110 session.clone(),
111 )?))
112 }
113
114 #[cfg(gpu_unstable)]
115 fn new_gpu_reader(
116 layout: &Self::Layout,
117 name: Arc<str>,
118 segment_source: Arc<dyn SegmentSource>,
119 ctx: Arc<cudarc::driver::CudaContext>,
120 ) -> VortexResult<crate::gpu::GpuLayoutReaderRef> {
121 layout
123 .children
124 .child(0, layout.dtype())?
125 .new_gpu_reader(name, segment_source, ctx)
126 }
127
128 fn build(
129 _encoding: &Self::Encoding,
130 dtype: &DType,
131 _row_count: u64,
132 metadata: &ZonedMetadata,
133 _segment_ids: Vec<SegmentId>,
134 children: &dyn LayoutChildren,
135 _ctx: ArrayContext,
136 ) -> VortexResult<Self::Layout> {
137 Ok(ZonedLayout {
138 dtype: dtype.clone(),
139 children: children.to_arc(),
140 zone_len: metadata.zone_len as usize,
141 present_stats: metadata.present_stats.clone(),
142 })
143 }
144
145 fn with_children(layout: &mut Self::Layout, children: Vec<LayoutRef>) -> VortexResult<()> {
146 if children.len() != 2 {
147 vortex_bail!(
148 "ZonedLayout expects exactly 2 children (data, zones), got {}",
149 children.len()
150 );
151 }
152 layout.children = OwnedLayoutChildren::layout_children(children);
153 Ok(())
154 }
155}
156
157#[derive(Debug)]
158pub struct ZonedLayoutEncoding;
159
160#[derive(Clone, Debug)]
161pub struct ZonedLayout {
162 dtype: DType,
163 children: Arc<dyn LayoutChildren>,
164 zone_len: usize,
165 present_stats: Arc<[Stat]>,
166}
167
168impl ZonedLayout {
169 pub fn new(
170 data: LayoutRef,
171 zones: LayoutRef,
172 zone_len: usize,
173 present_stats: Arc<[Stat]>,
174 ) -> Self {
175 if zone_len == 0 {
176 vortex_panic!("Zone length must be greater than 0");
177 }
178 let expected_dtype = ZoneMap::dtype_for_stats_table(data.dtype(), &present_stats);
179 if zones.dtype() != &expected_dtype {
180 vortex_panic!("Invalid zone map layout: zones dtype does not match expected dtype");
181 }
182 Self {
183 dtype: data.dtype().clone(),
184 children: OwnedLayoutChildren::layout_children(vec![data, zones]),
185 zone_len,
186 present_stats,
187 }
188 }
189
190 pub fn nzones(&self) -> usize {
191 usize::try_from(self.children.child_row_count(1)).vortex_expect("Invalid number of zones")
192 }
193
194 pub fn present_stats(&self) -> &Arc<[Stat]> {
196 &self.present_stats
197 }
198}
199
200#[derive(Debug, PartialEq, Eq, Clone)]
201pub struct ZonedMetadata {
202 pub(super) zone_len: u32,
203 pub(super) present_stats: Arc<[Stat]>,
204}
205
206impl DeserializeMetadata for ZonedMetadata {
207 type Output = Self;
208
209 fn deserialize(metadata: &[u8]) -> VortexResult<Self::Output> {
210 let zone_len = u32::try_from_le_bytes(&metadata[0..4])?;
211 let present_stats: Arc<[Stat]> = stats_from_bitset_bytes(&metadata[4..]).into();
212 Ok(Self {
213 zone_len,
214 present_stats,
215 })
216 }
217}
218
219impl SerializeMetadata for ZonedMetadata {
220 fn serialize(self) -> Vec<u8> {
221 let mut metadata = vec![];
222 metadata.extend_from_slice(&self.zone_len.to_le_bytes());
224 metadata.extend_from_slice(&as_stat_bitset_bytes(&self.present_stats));
226 metadata
227 }
228}
229
230#[cfg(test)]
231mod tests {
232 use rstest::rstest;
233
234 use super::*;
235
236 #[rstest]
237 #[case(ZonedMetadata {
238 zone_len: u32::MAX,
239 present_stats: Arc::new([]),
240 })]
241 #[case(ZonedMetadata {
242 zone_len: 0,
243 present_stats: Arc::new([Stat::IsConstant]),
244 })]
245 #[case::all_sorted(ZonedMetadata {
246 zone_len: 314,
247 present_stats: Arc::new([Stat::IsConstant, Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
248 })]
249 #[case::some_sorted(ZonedMetadata {
250 zone_len: 314,
251 present_stats: Arc::new([Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
252 })]
253 fn test_metadata_serialization(#[case] metadata: ZonedMetadata) {
254 let serialized = metadata.clone().serialize();
255 let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
256 assert_eq!(deserialized, metadata);
257 }
258
259 #[test]
260 fn test_deserialize_unsorted_stats() {
261 let metadata = ZonedMetadata {
262 zone_len: u32::MAX,
263 present_stats: Arc::new([Stat::IsStrictSorted, Stat::IsSorted]),
264 };
265 let serialized = metadata.clone().serialize();
266 let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
267 assert!(deserialized.present_stats.is_sorted());
268 assert_eq!(
269 deserialized.present_stats.len(),
270 metadata.present_stats.len()
271 );
272 assert_ne!(deserialized.present_stats, metadata.present_stats);
273 }
274}