Skip to main content

vortex_layout/layouts/zoned/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4mod builder;
5mod reader;
6pub mod writer;
7pub mod zone_map;
8
9use std::sync::Arc;
10
11pub use builder::MAX_IS_TRUNCATED;
12pub use builder::MIN_IS_TRUNCATED;
13use vortex_array::ArrayContext;
14use vortex_array::DeserializeMetadata;
15use vortex_array::SerializeMetadata;
16use vortex_array::expr::stats::Stat;
17use vortex_array::stats::as_stat_bitset_bytes;
18use vortex_array::stats::stats_from_bitset_bytes;
19use vortex_dtype::DType;
20use vortex_dtype::TryFromBytes;
21use vortex_error::VortexExpect;
22use vortex_error::VortexResult;
23use vortex_error::vortex_bail;
24use vortex_error::vortex_panic;
25use vortex_session::VortexSession;
26
27use crate::LayoutChildType;
28use crate::LayoutEncodingRef;
29use crate::LayoutId;
30use crate::LayoutReaderRef;
31use crate::LayoutRef;
32use crate::VTable;
33use crate::children::LayoutChildren;
34use crate::children::OwnedLayoutChildren;
35use crate::layouts::zoned::reader::ZonedReader;
36use crate::layouts::zoned::zone_map::ZoneMap;
37use crate::segments::SegmentId;
38use crate::segments::SegmentSource;
39use crate::vtable;
40
41vtable!(Zoned);
42
43impl VTable for ZonedVTable {
44    type Layout = ZonedLayout;
45    type Encoding = ZonedLayoutEncoding;
46    type Metadata = ZonedMetadata;
47
48    fn id(_encoding: &Self::Encoding) -> LayoutId {
49        LayoutId::new_ref("vortex.stats") // For legacy reasons, this is called stats
50    }
51
52    fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
53        LayoutEncodingRef::new_ref(ZonedLayoutEncoding.as_ref())
54    }
55
56    fn row_count(layout: &Self::Layout) -> u64 {
57        layout.children.child_row_count(0)
58    }
59
60    fn dtype(layout: &Self::Layout) -> &DType {
61        &layout.dtype
62    }
63
64    fn metadata(layout: &Self::Layout) -> Self::Metadata {
65        ZonedMetadata {
66            zone_len: u32::try_from(layout.zone_len).vortex_expect("Invalid zone length"),
67            present_stats: layout.present_stats.clone(),
68        }
69    }
70
71    fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
72        vec![]
73    }
74
75    fn nchildren(_layout: &Self::Layout) -> usize {
76        2
77    }
78
79    fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
80        match idx {
81            0 => layout.children.child(0, layout.dtype()),
82            1 => layout.children.child(
83                1,
84                &ZoneMap::dtype_for_stats_table(layout.dtype(), &layout.present_stats),
85            ),
86            _ => vortex_bail!("Invalid child index: {}", idx),
87        }
88    }
89
90    fn child_type(_layout: &Self::Layout, idx: usize) -> LayoutChildType {
91        match idx {
92            0 => LayoutChildType::Transparent("data".into()),
93            1 => LayoutChildType::Auxiliary("zones".into()),
94            _ => vortex_panic!("Invalid child index: {}", idx),
95        }
96    }
97
98    fn new_reader(
99        layout: &Self::Layout,
100        name: Arc<str>,
101        segment_source: Arc<dyn SegmentSource>,
102        session: &VortexSession,
103    ) -> VortexResult<LayoutReaderRef> {
104        Ok(Arc::new(ZonedReader::try_new(
105            layout.clone(),
106            name,
107            segment_source,
108            session.clone(),
109        )?))
110    }
111
112    fn build(
113        _encoding: &Self::Encoding,
114        dtype: &DType,
115        _row_count: u64,
116        metadata: &ZonedMetadata,
117        _segment_ids: Vec<SegmentId>,
118        children: &dyn LayoutChildren,
119        _ctx: &ArrayContext,
120    ) -> VortexResult<Self::Layout> {
121        Ok(ZonedLayout {
122            dtype: dtype.clone(),
123            children: children.to_arc(),
124            zone_len: metadata.zone_len as usize,
125            present_stats: metadata.present_stats.clone(),
126        })
127    }
128
129    fn with_children(layout: &mut Self::Layout, children: Vec<LayoutRef>) -> VortexResult<()> {
130        if children.len() != 2 {
131            vortex_bail!(
132                "ZonedLayout expects exactly 2 children (data, zones), got {}",
133                children.len()
134            );
135        }
136        layout.children = OwnedLayoutChildren::layout_children(children);
137        Ok(())
138    }
139}
140
141#[derive(Debug)]
142pub struct ZonedLayoutEncoding;
143
144#[derive(Clone, Debug)]
145pub struct ZonedLayout {
146    dtype: DType,
147    children: Arc<dyn LayoutChildren>,
148    zone_len: usize,
149    present_stats: Arc<[Stat]>,
150}
151
152impl ZonedLayout {
153    pub fn new(
154        data: LayoutRef,
155        zones: LayoutRef,
156        zone_len: usize,
157        present_stats: Arc<[Stat]>,
158    ) -> Self {
159        if zone_len == 0 {
160            vortex_panic!("Zone length must be greater than 0");
161        }
162        let expected_dtype = ZoneMap::dtype_for_stats_table(data.dtype(), &present_stats);
163        if zones.dtype() != &expected_dtype {
164            vortex_panic!("Invalid zone map layout: zones dtype does not match expected dtype");
165        }
166        Self {
167            dtype: data.dtype().clone(),
168            children: OwnedLayoutChildren::layout_children(vec![data, zones]),
169            zone_len,
170            present_stats,
171        }
172    }
173
174    pub fn nzones(&self) -> usize {
175        usize::try_from(self.children.child_row_count(1)).vortex_expect("Invalid number of zones")
176    }
177
178    /// Returns an array of stats that exist in the layout's data, must be sorted.
179    pub fn present_stats(&self) -> &Arc<[Stat]> {
180        &self.present_stats
181    }
182}
183
184#[derive(Debug, PartialEq, Eq, Clone)]
185pub struct ZonedMetadata {
186    pub(super) zone_len: u32,
187    pub(super) present_stats: Arc<[Stat]>,
188}
189
190impl DeserializeMetadata for ZonedMetadata {
191    type Output = Self;
192
193    fn deserialize(metadata: &[u8]) -> VortexResult<Self::Output> {
194        let zone_len = u32::try_from_le_bytes(&metadata[0..4])?;
195        let present_stats: Arc<[Stat]> = stats_from_bitset_bytes(&metadata[4..]).into();
196        Ok(Self {
197            zone_len,
198            present_stats,
199        })
200    }
201}
202
203impl SerializeMetadata for ZonedMetadata {
204    fn serialize(self) -> Vec<u8> {
205        let mut metadata = vec![];
206        // First, write the block size to the metadata.
207        metadata.extend_from_slice(&self.zone_len.to_le_bytes());
208        // Then write the bit-set of statistics.
209        metadata.extend_from_slice(&as_stat_bitset_bytes(&self.present_stats));
210        metadata
211    }
212}
213
214#[cfg(test)]
215mod tests {
216    use rstest::rstest;
217
218    use super::*;
219
220    #[rstest]
221    #[case(ZonedMetadata {
222            zone_len: u32::MAX,
223            present_stats: Arc::new([]),
224        })]
225    #[case(ZonedMetadata {
226            zone_len: 0,
227            present_stats: Arc::new([Stat::IsConstant]),
228        })]
229    #[case::all_sorted(ZonedMetadata {
230            zone_len: 314,
231            present_stats: Arc::new([Stat::IsConstant, Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
232        })]
233    #[case::some_sorted(ZonedMetadata {
234            zone_len: 314,
235            present_stats: Arc::new([Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
236        })]
237    fn test_metadata_serialization(#[case] metadata: ZonedMetadata) {
238        let serialized = metadata.clone().serialize();
239        let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
240        assert_eq!(deserialized, metadata);
241    }
242
243    #[test]
244    fn test_deserialize_unsorted_stats() {
245        let metadata = ZonedMetadata {
246            zone_len: u32::MAX,
247            present_stats: Arc::new([Stat::IsStrictSorted, Stat::IsSorted]),
248        };
249        let serialized = metadata.clone().serialize();
250        let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
251        assert!(deserialized.present_stats.is_sorted());
252        assert_eq!(
253            deserialized.present_stats.len(),
254            metadata.present_stats.len()
255        );
256        assert_ne!(deserialized.present_stats, metadata.present_stats);
257    }
258}