vortex_layout/layouts/zoned/
mod.rs1mod builder;
5mod reader;
6pub mod writer;
7pub mod zone_map;
8
9use std::sync::Arc;
10
11pub use builder::{MAX_IS_TRUNCATED, MIN_IS_TRUNCATED, lower_bound, upper_bound};
12use vortex_array::stats::{Stat, as_stat_bitset_bytes, stats_from_bitset_bytes};
13use vortex_array::{ArrayContext, DeserializeMetadata, SerializeMetadata};
14use vortex_dtype::{DType, TryFromBytes};
15use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_panic};
16
17use crate::children::LayoutChildren;
18use crate::layouts::zoned::reader::ZonedReader;
19use crate::layouts::zoned::zone_map::ZoneMap;
20use crate::segments::{SegmentId, SegmentSource};
21use crate::{
22 LayoutChildType, LayoutEncodingRef, LayoutId, LayoutReaderRef, LayoutRef, VTable, vtable,
23};
24
25vtable!(Zoned);
26
27impl VTable for ZonedVTable {
28 type Layout = ZonedLayout;
29 type Encoding = ZonedLayoutEncoding;
30 type Metadata = ZonedMetadata;
31
32 fn id(_encoding: &Self::Encoding) -> LayoutId {
33 LayoutId::new_ref("vortex.stats") }
35
36 fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
37 LayoutEncodingRef::new_ref(ZonedLayoutEncoding.as_ref())
38 }
39
40 fn row_count(layout: &Self::Layout) -> u64 {
41 layout.data.row_count()
42 }
43
44 fn dtype(layout: &Self::Layout) -> &DType {
45 layout.data.dtype()
46 }
47
48 fn metadata(layout: &Self::Layout) -> Self::Metadata {
49 ZonedMetadata {
50 zone_len: u32::try_from(layout.zone_len).vortex_expect("Invalid zone length"),
51 present_stats: layout.present_stats.clone(),
52 }
53 }
54
55 fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
56 vec![]
57 }
58
59 fn nchildren(_layout: &Self::Layout) -> usize {
60 2
61 }
62
63 fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
64 match idx {
65 0 => Ok(layout.data.clone()),
66 1 => Ok(layout.zones.clone()),
67 _ => vortex_bail!("Invalid child index: {}", idx),
68 }
69 }
70
71 fn child_type(_layout: &Self::Layout, idx: usize) -> LayoutChildType {
72 match idx {
73 0 => LayoutChildType::Transparent("data".into()),
74 1 => LayoutChildType::Auxiliary("zones".into()),
75 _ => vortex_panic!("Invalid child index: {}", idx),
76 }
77 }
78
79 fn new_reader(
80 layout: &Self::Layout,
81 name: Arc<str>,
82 segment_source: Arc<dyn SegmentSource>,
83 ) -> VortexResult<LayoutReaderRef> {
84 Ok(Arc::new(ZonedReader::try_new(
85 layout.clone(),
86 name,
87 segment_source,
88 )?))
89 }
90
91 fn build(
92 _encoding: &Self::Encoding,
93 dtype: &DType,
94 _row_count: u64,
95 metadata: &<Self::Metadata as DeserializeMetadata>::Output,
96 _segment_ids: Vec<SegmentId>,
97 children: &dyn LayoutChildren,
98 _ctx: ArrayContext,
99 ) -> VortexResult<Self::Layout> {
100 let data = children.child(0, dtype)?;
101
102 let zones_dtype = ZoneMap::dtype_for_stats_table(data.dtype(), &metadata.present_stats);
103 let zones = children.child(1, &zones_dtype)?;
104
105 Ok(ZonedLayout::new(
106 data,
107 zones,
108 metadata.zone_len as usize,
109 metadata.present_stats.clone(),
110 ))
111 }
112}
113
114#[derive(Debug)]
115pub struct ZonedLayoutEncoding;
116
117#[derive(Clone, Debug)]
118pub struct ZonedLayout {
119 data: LayoutRef,
120 zones: LayoutRef,
121 zone_len: usize,
122 present_stats: Arc<[Stat]>,
123}
124
125impl ZonedLayout {
126 pub fn new(
127 data: LayoutRef,
128 zones: LayoutRef,
129 zone_len: usize,
130 present_stats: Arc<[Stat]>,
131 ) -> Self {
132 if zone_len == 0 {
133 vortex_panic!("Zone length must be greater than 0");
134 }
135 let expected_dtype = ZoneMap::dtype_for_stats_table(data.dtype(), &present_stats);
136 if zones.dtype() != &expected_dtype {
137 vortex_panic!("Invalid zone map layout: zones dtype does not match expected dtype");
138 }
139 Self {
140 data,
141 zones,
142 zone_len,
143 present_stats,
144 }
145 }
146
147 pub fn nzones(&self) -> usize {
148 usize::try_from(self.zones.row_count()).vortex_expect("Invalid number of zones")
149 }
150
151 pub fn present_stats(&self) -> &Arc<[Stat]> {
153 &self.present_stats
154 }
155}
156
157#[derive(Debug, PartialEq, Eq, Clone)]
158pub struct ZonedMetadata {
159 pub(super) zone_len: u32,
160 pub(super) present_stats: Arc<[Stat]>,
161}
162
163impl DeserializeMetadata for ZonedMetadata {
164 type Output = Self;
165
166 fn deserialize(metadata: &[u8]) -> VortexResult<Self::Output> {
167 let zone_len = u32::try_from_le_bytes(&metadata[0..4])?;
168 let present_stats: Arc<[Stat]> = stats_from_bitset_bytes(&metadata[4..]).into();
169 Ok(Self {
170 zone_len,
171 present_stats,
172 })
173 }
174}
175
176impl SerializeMetadata for ZonedMetadata {
177 fn serialize(self) -> Vec<u8> {
178 let mut metadata = vec![];
179 metadata.extend_from_slice(&self.zone_len.to_le_bytes());
181 metadata.extend_from_slice(&as_stat_bitset_bytes(&self.present_stats));
183 metadata
184 }
185}
186
187#[cfg(test)]
188mod tests {
189
190 use rstest::rstest;
191
192 use super::*;
193
194 #[rstest]
195 #[case(ZonedMetadata {
196 zone_len: u32::MAX,
197 present_stats: Arc::new([]),
198 })]
199 #[case(ZonedMetadata {
200 zone_len: 0,
201 present_stats: Arc::new([Stat::IsConstant]),
202 })]
203 #[case::all_sorted(ZonedMetadata {
204 zone_len: 314,
205 present_stats: Arc::new([Stat::IsConstant, Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
206 })]
207 #[case::some_sorted(ZonedMetadata {
208 zone_len: 314,
209 present_stats: Arc::new([Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
210 })]
211 fn test_metadata_serialization(#[case] metadata: ZonedMetadata) {
212 let serialized = metadata.clone().serialize();
213 let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
214 assert_eq!(deserialized, metadata);
215 }
216
217 #[test]
218 fn test_deserialize_unsorted_stats() {
219 let metadata = ZonedMetadata {
220 zone_len: u32::MAX,
221 present_stats: Arc::new([Stat::IsStrictSorted, Stat::IsSorted]),
222 };
223 let serialized = metadata.clone().serialize();
224 let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
225 assert!(deserialized.present_stats.is_sorted());
226 assert_eq!(
227 deserialized.present_stats.len(),
228 metadata.present_stats.len()
229 );
230 assert_ne!(deserialized.present_stats, metadata.present_stats);
231 }
232}