vortex_layout/layouts/zoned/
mod.rs1mod builder;
5mod reader;
6pub mod writer;
7pub mod zone_map;
8
9use std::sync::Arc;
10
11pub use builder::MAX_IS_TRUNCATED;
12pub use builder::MIN_IS_TRUNCATED;
13pub use builder::lower_bound;
14pub use builder::upper_bound;
15use vortex_array::ArrayContext;
16use vortex_array::DeserializeMetadata;
17use vortex_array::SerializeMetadata;
18use vortex_array::expr::stats::Stat;
19use vortex_array::stats::as_stat_bitset_bytes;
20use vortex_array::stats::stats_from_bitset_bytes;
21use vortex_dtype::DType;
22use vortex_dtype::TryFromBytes;
23use vortex_error::VortexExpect;
24use vortex_error::VortexResult;
25use vortex_error::vortex_bail;
26use vortex_error::vortex_panic;
27use vortex_session::VortexSession;
28
29use crate::LayoutChildType;
30use crate::LayoutEncodingRef;
31use crate::LayoutId;
32use crate::LayoutReaderRef;
33use crate::LayoutRef;
34use crate::VTable;
35use crate::children::LayoutChildren;
36use crate::children::OwnedLayoutChildren;
37use crate::layouts::zoned::reader::ZonedReader;
38use crate::layouts::zoned::zone_map::ZoneMap;
39use crate::segments::SegmentId;
40use crate::segments::SegmentSource;
41use crate::vtable;
42
43vtable!(Zoned);
44
45impl VTable for ZonedVTable {
46 type Layout = ZonedLayout;
47 type Encoding = ZonedLayoutEncoding;
48 type Metadata = ZonedMetadata;
49
50 fn id(_encoding: &Self::Encoding) -> LayoutId {
51 LayoutId::new_ref("vortex.stats") }
53
54 fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
55 LayoutEncodingRef::new_ref(ZonedLayoutEncoding.as_ref())
56 }
57
58 fn row_count(layout: &Self::Layout) -> u64 {
59 layout.children.child_row_count(0)
60 }
61
62 fn dtype(layout: &Self::Layout) -> &DType {
63 &layout.dtype
64 }
65
66 fn metadata(layout: &Self::Layout) -> Self::Metadata {
67 ZonedMetadata {
68 zone_len: u32::try_from(layout.zone_len).vortex_expect("Invalid zone length"),
69 present_stats: layout.present_stats.clone(),
70 }
71 }
72
73 fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
74 vec![]
75 }
76
77 fn nchildren(_layout: &Self::Layout) -> usize {
78 2
79 }
80
81 fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
82 match idx {
83 0 => layout.children.child(0, layout.dtype()),
84 1 => layout.children.child(
85 1,
86 &ZoneMap::dtype_for_stats_table(layout.dtype(), &layout.present_stats),
87 ),
88 _ => vortex_bail!("Invalid child index: {}", idx),
89 }
90 }
91
92 fn child_type(_layout: &Self::Layout, idx: usize) -> LayoutChildType {
93 match idx {
94 0 => LayoutChildType::Transparent("data".into()),
95 1 => LayoutChildType::Auxiliary("zones".into()),
96 _ => vortex_panic!("Invalid child index: {}", idx),
97 }
98 }
99
100 fn new_reader(
101 layout: &Self::Layout,
102 name: Arc<str>,
103 segment_source: Arc<dyn SegmentSource>,
104 session: &VortexSession,
105 ) -> VortexResult<LayoutReaderRef> {
106 Ok(Arc::new(ZonedReader::try_new(
107 layout.clone(),
108 name,
109 segment_source,
110 session.clone(),
111 )?))
112 }
113
114 #[cfg(gpu_unstable)]
115 fn new_gpu_reader(
116 layout: &Self::Layout,
117 name: Arc<str>,
118 segment_source: Arc<dyn SegmentSource>,
119 ctx: Arc<cudarc::driver::CudaContext>,
120 ) -> VortexResult<crate::gpu::GpuLayoutReaderRef> {
121 layout
123 .children
124 .child(0, layout.dtype())?
125 .new_gpu_reader(name, segment_source, ctx)
126 }
127
128 fn build(
129 _encoding: &Self::Encoding,
130 dtype: &DType,
131 _row_count: u64,
132 metadata: &ZonedMetadata,
133 _segment_ids: Vec<SegmentId>,
134 children: &dyn LayoutChildren,
135 _ctx: ArrayContext,
136 ) -> VortexResult<Self::Layout> {
137 Ok(ZonedLayout {
138 dtype: dtype.clone(),
139 children: children.to_arc(),
140 zone_len: metadata.zone_len as usize,
141 present_stats: metadata.present_stats.clone(),
142 })
143 }
144}
145
146#[derive(Debug)]
147pub struct ZonedLayoutEncoding;
148
149#[derive(Clone, Debug)]
150pub struct ZonedLayout {
151 dtype: DType,
152 children: Arc<dyn LayoutChildren>,
153 zone_len: usize,
154 present_stats: Arc<[Stat]>,
155}
156
157impl ZonedLayout {
158 pub fn new(
159 data: LayoutRef,
160 zones: LayoutRef,
161 zone_len: usize,
162 present_stats: Arc<[Stat]>,
163 ) -> Self {
164 if zone_len == 0 {
165 vortex_panic!("Zone length must be greater than 0");
166 }
167 let expected_dtype = ZoneMap::dtype_for_stats_table(data.dtype(), &present_stats);
168 if zones.dtype() != &expected_dtype {
169 vortex_panic!("Invalid zone map layout: zones dtype does not match expected dtype");
170 }
171 Self {
172 dtype: data.dtype().clone(),
173 children: OwnedLayoutChildren::layout_children(vec![data, zones]),
174 zone_len,
175 present_stats,
176 }
177 }
178
179 pub fn nzones(&self) -> usize {
180 usize::try_from(self.children.child_row_count(1)).vortex_expect("Invalid number of zones")
181 }
182
183 pub fn present_stats(&self) -> &Arc<[Stat]> {
185 &self.present_stats
186 }
187}
188
189#[derive(Debug, PartialEq, Eq, Clone)]
190pub struct ZonedMetadata {
191 pub(super) zone_len: u32,
192 pub(super) present_stats: Arc<[Stat]>,
193}
194
195impl DeserializeMetadata for ZonedMetadata {
196 type Output = Self;
197
198 fn deserialize(metadata: &[u8]) -> VortexResult<Self::Output> {
199 let zone_len = u32::try_from_le_bytes(&metadata[0..4])?;
200 let present_stats: Arc<[Stat]> = stats_from_bitset_bytes(&metadata[4..]).into();
201 Ok(Self {
202 zone_len,
203 present_stats,
204 })
205 }
206}
207
208impl SerializeMetadata for ZonedMetadata {
209 fn serialize(self) -> Vec<u8> {
210 let mut metadata = vec![];
211 metadata.extend_from_slice(&self.zone_len.to_le_bytes());
213 metadata.extend_from_slice(&as_stat_bitset_bytes(&self.present_stats));
215 metadata
216 }
217}
218
219#[cfg(test)]
220mod tests {
221 use rstest::rstest;
222
223 use super::*;
224
225 #[rstest]
226 #[case(ZonedMetadata {
227 zone_len: u32::MAX,
228 present_stats: Arc::new([]),
229 })]
230 #[case(ZonedMetadata {
231 zone_len: 0,
232 present_stats: Arc::new([Stat::IsConstant]),
233 })]
234 #[case::all_sorted(ZonedMetadata {
235 zone_len: 314,
236 present_stats: Arc::new([Stat::IsConstant, Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
237 })]
238 #[case::some_sorted(ZonedMetadata {
239 zone_len: 314,
240 present_stats: Arc::new([Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
241 })]
242 fn test_metadata_serialization(#[case] metadata: ZonedMetadata) {
243 let serialized = metadata.clone().serialize();
244 let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
245 assert_eq!(deserialized, metadata);
246 }
247
248 #[test]
249 fn test_deserialize_unsorted_stats() {
250 let metadata = ZonedMetadata {
251 zone_len: u32::MAX,
252 present_stats: Arc::new([Stat::IsStrictSorted, Stat::IsSorted]),
253 };
254 let serialized = metadata.clone().serialize();
255 let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
256 assert!(deserialized.present_stats.is_sorted());
257 assert_eq!(
258 deserialized.present_stats.len(),
259 metadata.present_stats.len()
260 );
261 assert_ne!(deserialized.present_stats, metadata.present_stats);
262 }
263}