vortex_layout/layouts/zoned/
mod.rs1mod builder;
5mod reader;
6pub mod writer;
7pub mod zone_map;
8
9use std::sync::Arc;
10
11pub use builder::{MAX_IS_TRUNCATED, MIN_IS_TRUNCATED, lower_bound, upper_bound};
12use vortex_array::stats::{Stat, as_stat_bitset_bytes, stats_from_bitset_bytes};
13use vortex_array::{ArrayContext, DeserializeMetadata, SerializeMetadata};
14use vortex_dtype::{DType, TryFromBytes};
15use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_panic};
16
17use crate::children::{LayoutChildren, OwnedLayoutChildren};
18use crate::layouts::zoned::reader::ZonedReader;
19use crate::layouts::zoned::zone_map::ZoneMap;
20use crate::segments::{SegmentId, SegmentSource};
21use crate::{
22 LayoutChildType, LayoutEncodingRef, LayoutId, LayoutReaderRef, LayoutRef, VTable, vtable,
23};
24
25vtable!(Zoned);
26
27impl VTable for ZonedVTable {
28 type Layout = ZonedLayout;
29 type Encoding = ZonedLayoutEncoding;
30 type Metadata = ZonedMetadata;
31
32 fn id(_encoding: &Self::Encoding) -> LayoutId {
33 LayoutId::new_ref("vortex.stats") }
35
36 fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
37 LayoutEncodingRef::new_ref(ZonedLayoutEncoding.as_ref())
38 }
39
40 fn row_count(layout: &Self::Layout) -> u64 {
41 layout.children.child_row_count(0)
42 }
43
44 fn dtype(layout: &Self::Layout) -> &DType {
45 &layout.dtype
46 }
47
48 fn metadata(layout: &Self::Layout) -> Self::Metadata {
49 ZonedMetadata {
50 zone_len: u32::try_from(layout.zone_len).vortex_expect("Invalid zone length"),
51 present_stats: layout.present_stats.clone(),
52 }
53 }
54
55 fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
56 vec![]
57 }
58
59 fn nchildren(_layout: &Self::Layout) -> usize {
60 2
61 }
62
63 fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
64 match idx {
65 0 => layout.children.child(0, layout.dtype()),
66 1 => layout.children.child(
67 1,
68 &ZoneMap::dtype_for_stats_table(layout.dtype(), &layout.present_stats),
69 ),
70 _ => vortex_bail!("Invalid child index: {}", idx),
71 }
72 }
73
74 fn child_type(_layout: &Self::Layout, idx: usize) -> LayoutChildType {
75 match idx {
76 0 => LayoutChildType::Transparent("data".into()),
77 1 => LayoutChildType::Auxiliary("zones".into()),
78 _ => vortex_panic!("Invalid child index: {}", idx),
79 }
80 }
81
82 fn new_reader(
83 layout: &Self::Layout,
84 name: Arc<str>,
85 segment_source: Arc<dyn SegmentSource>,
86 ) -> VortexResult<LayoutReaderRef> {
87 Ok(Arc::new(ZonedReader::try_new(
88 layout.clone(),
89 name,
90 segment_source,
91 )?))
92 }
93
94 #[cfg(gpu_unstable)]
95 fn new_gpu_reader(
96 layout: &Self::Layout,
97 name: Arc<str>,
98 segment_source: Arc<dyn SegmentSource>,
99 ctx: Arc<cudarc::driver::CudaContext>,
100 ) -> VortexResult<crate::gpu::GpuLayoutReaderRef> {
101 layout
103 .children
104 .child(0, layout.dtype())?
105 .new_gpu_reader(name, segment_source, ctx)
106 }
107
108 fn build(
109 _encoding: &Self::Encoding,
110 dtype: &DType,
111 _row_count: u64,
112 metadata: &ZonedMetadata,
113 _segment_ids: Vec<SegmentId>,
114 children: &dyn LayoutChildren,
115 _ctx: ArrayContext,
116 ) -> VortexResult<Self::Layout> {
117 Ok(ZonedLayout {
118 dtype: dtype.clone(),
119 children: children.to_arc(),
120 zone_len: metadata.zone_len as usize,
121 present_stats: metadata.present_stats.clone(),
122 })
123 }
124}
125
126#[derive(Debug)]
127pub struct ZonedLayoutEncoding;
128
129#[derive(Clone, Debug)]
130pub struct ZonedLayout {
131 dtype: DType,
132 children: Arc<dyn LayoutChildren>,
133 zone_len: usize,
134 present_stats: Arc<[Stat]>,
135}
136
137impl ZonedLayout {
138 pub fn new(
139 data: LayoutRef,
140 zones: LayoutRef,
141 zone_len: usize,
142 present_stats: Arc<[Stat]>,
143 ) -> Self {
144 if zone_len == 0 {
145 vortex_panic!("Zone length must be greater than 0");
146 }
147 let expected_dtype = ZoneMap::dtype_for_stats_table(data.dtype(), &present_stats);
148 if zones.dtype() != &expected_dtype {
149 vortex_panic!("Invalid zone map layout: zones dtype does not match expected dtype");
150 }
151 Self {
152 dtype: data.dtype().clone(),
153 children: OwnedLayoutChildren::layout_children(vec![data, zones]),
154 zone_len,
155 present_stats,
156 }
157 }
158
159 pub fn nzones(&self) -> usize {
160 usize::try_from(self.children.child_row_count(1)).vortex_expect("Invalid number of zones")
161 }
162
163 pub fn present_stats(&self) -> &Arc<[Stat]> {
165 &self.present_stats
166 }
167}
168
169#[derive(Debug, PartialEq, Eq, Clone)]
170pub struct ZonedMetadata {
171 pub(super) zone_len: u32,
172 pub(super) present_stats: Arc<[Stat]>,
173}
174
175impl DeserializeMetadata for ZonedMetadata {
176 type Output = Self;
177
178 fn deserialize(metadata: &[u8]) -> VortexResult<Self::Output> {
179 let zone_len = u32::try_from_le_bytes(&metadata[0..4])?;
180 let present_stats: Arc<[Stat]> = stats_from_bitset_bytes(&metadata[4..]).into();
181 Ok(Self {
182 zone_len,
183 present_stats,
184 })
185 }
186}
187
188impl SerializeMetadata for ZonedMetadata {
189 fn serialize(self) -> Vec<u8> {
190 let mut metadata = vec![];
191 metadata.extend_from_slice(&self.zone_len.to_le_bytes());
193 metadata.extend_from_slice(&as_stat_bitset_bytes(&self.present_stats));
195 metadata
196 }
197}
198
199#[cfg(test)]
200mod tests {
201 use rstest::rstest;
202
203 use super::*;
204
205 #[rstest]
206 #[case(ZonedMetadata {
207 zone_len: u32::MAX,
208 present_stats: Arc::new([]),
209 })]
210 #[case(ZonedMetadata {
211 zone_len: 0,
212 present_stats: Arc::new([Stat::IsConstant]),
213 })]
214 #[case::all_sorted(ZonedMetadata {
215 zone_len: 314,
216 present_stats: Arc::new([Stat::IsConstant, Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
217 })]
218 #[case::some_sorted(ZonedMetadata {
219 zone_len: 314,
220 present_stats: Arc::new([Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
221 })]
222 fn test_metadata_serialization(#[case] metadata: ZonedMetadata) {
223 let serialized = metadata.clone().serialize();
224 let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
225 assert_eq!(deserialized, metadata);
226 }
227
228 #[test]
229 fn test_deserialize_unsorted_stats() {
230 let metadata = ZonedMetadata {
231 zone_len: u32::MAX,
232 present_stats: Arc::new([Stat::IsStrictSorted, Stat::IsSorted]),
233 };
234 let serialized = metadata.clone().serialize();
235 let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
236 assert!(deserialized.present_stats.is_sorted());
237 assert_eq!(
238 deserialized.present_stats.len(),
239 metadata.present_stats.len()
240 );
241 assert_ne!(deserialized.present_stats, metadata.present_stats);
242 }
243}