vortex_layout/layouts/zoned/
mod.rs1mod builder;
5mod reader;
6pub mod writer;
7pub mod zone_map;
8
9use std::sync::Arc;
10
11pub use builder::{MAX_IS_TRUNCATED, MIN_IS_TRUNCATED, lower_bound, upper_bound};
12use vortex_array::stats::{Stat, as_stat_bitset_bytes, stats_from_bitset_bytes};
13use vortex_array::{ArrayContext, DeserializeMetadata, SerializeMetadata};
14use vortex_dtype::{DType, TryFromBytes};
15use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_panic};
16
17use crate::children::LayoutChildren;
18use crate::layouts::zoned::reader::ZonedReader;
19use crate::layouts::zoned::zone_map::ZoneMap;
20use crate::segments::{SegmentId, SegmentSource};
21use crate::{
22 LayoutChildType, LayoutEncodingRef, LayoutId, LayoutReaderRef, LayoutRef, VTable, vtable,
23};
24
25vtable!(Zoned);
26
27impl VTable for ZonedVTable {
28 type Layout = ZonedLayout;
29 type Encoding = ZonedLayoutEncoding;
30 type Metadata = ZonedMetadata;
31
32 fn id(_encoding: &Self::Encoding) -> LayoutId {
33 LayoutId::new_ref("vortex.stats") }
35
36 fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
37 LayoutEncodingRef::new_ref(ZonedLayoutEncoding.as_ref())
38 }
39
40 fn row_count(layout: &Self::Layout) -> u64 {
41 layout.data.row_count()
42 }
43
44 fn dtype(layout: &Self::Layout) -> &DType {
45 layout.data.dtype()
46 }
47
48 fn metadata(layout: &Self::Layout) -> Self::Metadata {
49 ZonedMetadata {
50 zone_len: u32::try_from(layout.zone_len).vortex_expect("Invalid zone length"),
51 present_stats: layout.present_stats.clone(),
52 }
53 }
54
55 fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
56 vec![]
57 }
58
59 fn nchildren(_layout: &Self::Layout) -> usize {
60 2
61 }
62
63 fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
64 match idx {
65 0 => Ok(layout.data.clone()),
66 1 => Ok(layout.zones.clone()),
67 _ => vortex_bail!("Invalid child index: {}", idx),
68 }
69 }
70
71 fn child_type(_layout: &Self::Layout, idx: usize) -> LayoutChildType {
72 match idx {
73 0 => LayoutChildType::Transparent("data".into()),
74 1 => LayoutChildType::Auxiliary("zones".into()),
75 _ => vortex_panic!("Invalid child index: {}", idx),
76 }
77 }
78
79 fn new_reader(
80 layout: &Self::Layout,
81 name: Arc<str>,
82 segment_source: Arc<dyn SegmentSource>,
83 ) -> VortexResult<LayoutReaderRef> {
84 Ok(Arc::new(ZonedReader::try_new(
85 layout.clone(),
86 name,
87 segment_source,
88 )?))
89 }
90
91 fn build(
92 _encoding: &Self::Encoding,
93 dtype: &DType,
94 _row_count: u64,
95 metadata: &<Self::Metadata as DeserializeMetadata>::Output,
96 _segment_ids: Vec<SegmentId>,
97 children: &dyn LayoutChildren,
98 _ctx: ArrayContext,
99 ) -> VortexResult<Self::Layout> {
100 let data = children.child(0, dtype)?;
101
102 let zones_dtype = ZoneMap::dtype_for_stats_table(data.dtype(), &metadata.present_stats);
103 let zones = children.child(1, &zones_dtype)?;
104
105 Ok(ZonedLayout::new(
106 data,
107 zones,
108 metadata.zone_len as usize,
109 metadata.present_stats.clone(),
110 ))
111 }
112}
113
114#[derive(Debug)]
115pub struct ZonedLayoutEncoding;
116
117#[derive(Clone, Debug)]
118pub struct ZonedLayout {
119 data: LayoutRef,
120 zones: LayoutRef,
121 zone_len: usize,
122 present_stats: Arc<[Stat]>,
123}
124
125impl ZonedLayout {
126 pub fn new(
127 data: LayoutRef,
128 zones: LayoutRef,
129 zone_len: usize,
130 present_stats: Arc<[Stat]>,
131 ) -> Self {
132 let expected_dtype = ZoneMap::dtype_for_stats_table(data.dtype(), &present_stats);
133 if zones.dtype() != &expected_dtype {
134 vortex_panic!("Invalid zone map layout: zones dtype does not match expected dtype");
135 }
136 Self {
137 data,
138 zones,
139 zone_len,
140 present_stats,
141 }
142 }
143
144 pub fn nzones(&self) -> usize {
145 usize::try_from(self.zones.row_count()).vortex_expect("Invalid number of zones")
146 }
147
148 pub fn present_stats(&self) -> &Arc<[Stat]> {
150 &self.present_stats
151 }
152}
153
154#[derive(Debug, PartialEq, Eq, Clone)]
155pub struct ZonedMetadata {
156 pub(super) zone_len: u32,
157 pub(super) present_stats: Arc<[Stat]>,
158}
159
160impl DeserializeMetadata for ZonedMetadata {
161 type Output = Self;
162
163 fn deserialize(metadata: &[u8]) -> VortexResult<Self::Output> {
164 let zone_len = u32::try_from_le_bytes(&metadata[0..4])?;
165 let present_stats: Arc<[Stat]> = stats_from_bitset_bytes(&metadata[4..]).into();
166 Ok(Self {
167 zone_len,
168 present_stats,
169 })
170 }
171}
172
173impl SerializeMetadata for ZonedMetadata {
174 fn serialize(self) -> Vec<u8> {
175 let mut metadata = vec![];
176 metadata.extend_from_slice(&self.zone_len.to_le_bytes());
178 metadata.extend_from_slice(&as_stat_bitset_bytes(&self.present_stats));
180 metadata
181 }
182}
183
184#[cfg(test)]
185mod tests {
186
187 use rstest::rstest;
188
189 use super::*;
190
191 #[rstest]
192 #[case(ZonedMetadata {
193 zone_len: u32::MAX,
194 present_stats: Arc::new([]),
195 })]
196 #[case(ZonedMetadata {
197 zone_len: 0,
198 present_stats: Arc::new([Stat::IsConstant]),
199 })]
200 #[case::all_sorted(ZonedMetadata {
201 zone_len: 314,
202 present_stats: Arc::new([Stat::IsConstant, Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
203 })]
204 #[case::some_sorted(ZonedMetadata {
205 zone_len: 314,
206 present_stats: Arc::new([Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
207 })]
208 fn test_metadata_serialization(#[case] metadata: ZonedMetadata) {
209 let serialized = metadata.clone().serialize();
210 let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
211 assert_eq!(deserialized, metadata);
212 }
213
214 #[test]
215 fn test_deserialize_unsorted_stats() {
216 let metadata = ZonedMetadata {
217 zone_len: u32::MAX,
218 present_stats: Arc::new([Stat::IsStrictSorted, Stat::IsSorted]),
219 };
220 let serialized = metadata.clone().serialize();
221 let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
222 assert!(deserialized.present_stats.is_sorted());
223 assert_eq!(
224 deserialized.present_stats.len(),
225 metadata.present_stats.len()
226 );
227 assert_ne!(deserialized.present_stats, metadata.present_stats);
228 }
229}