vortex_layout/layouts/zoned/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4mod builder;
5mod reader;
6pub mod writer;
7pub mod zone_map;
8
9use std::sync::Arc;
10
11pub use builder::MAX_IS_TRUNCATED;
12pub use builder::MIN_IS_TRUNCATED;
13pub use builder::lower_bound;
14pub use builder::upper_bound;
15use vortex_array::ArrayContext;
16use vortex_array::DeserializeMetadata;
17use vortex_array::SerializeMetadata;
18use vortex_array::expr::stats::Stat;
19use vortex_array::stats::as_stat_bitset_bytes;
20use vortex_array::stats::stats_from_bitset_bytes;
21use vortex_dtype::DType;
22use vortex_dtype::TryFromBytes;
23use vortex_error::VortexExpect;
24use vortex_error::VortexResult;
25use vortex_error::vortex_bail;
26use vortex_error::vortex_panic;
27use vortex_session::VortexSession;
28
29use crate::LayoutChildType;
30use crate::LayoutEncodingRef;
31use crate::LayoutId;
32use crate::LayoutReaderRef;
33use crate::LayoutRef;
34use crate::VTable;
35use crate::children::LayoutChildren;
36use crate::children::OwnedLayoutChildren;
37use crate::layouts::zoned::reader::ZonedReader;
38use crate::layouts::zoned::zone_map::ZoneMap;
39use crate::segments::SegmentId;
40use crate::segments::SegmentSource;
41use crate::vtable;
42
43vtable!(Zoned);
44
45impl VTable for ZonedVTable {
46    type Layout = ZonedLayout;
47    type Encoding = ZonedLayoutEncoding;
48    type Metadata = ZonedMetadata;
49
50    fn id(_encoding: &Self::Encoding) -> LayoutId {
51        LayoutId::new_ref("vortex.stats") // For legacy reasons, this is called stats
52    }
53
54    fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
55        LayoutEncodingRef::new_ref(ZonedLayoutEncoding.as_ref())
56    }
57
58    fn row_count(layout: &Self::Layout) -> u64 {
59        layout.children.child_row_count(0)
60    }
61
62    fn dtype(layout: &Self::Layout) -> &DType {
63        &layout.dtype
64    }
65
66    fn metadata(layout: &Self::Layout) -> Self::Metadata {
67        ZonedMetadata {
68            zone_len: u32::try_from(layout.zone_len).vortex_expect("Invalid zone length"),
69            present_stats: layout.present_stats.clone(),
70        }
71    }
72
73    fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
74        vec![]
75    }
76
77    fn nchildren(_layout: &Self::Layout) -> usize {
78        2
79    }
80
81    fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
82        match idx {
83            0 => layout.children.child(0, layout.dtype()),
84            1 => layout.children.child(
85                1,
86                &ZoneMap::dtype_for_stats_table(layout.dtype(), &layout.present_stats),
87            ),
88            _ => vortex_bail!("Invalid child index: {}", idx),
89        }
90    }
91
92    fn child_type(_layout: &Self::Layout, idx: usize) -> LayoutChildType {
93        match idx {
94            0 => LayoutChildType::Transparent("data".into()),
95            1 => LayoutChildType::Auxiliary("zones".into()),
96            _ => vortex_panic!("Invalid child index: {}", idx),
97        }
98    }
99
100    fn new_reader(
101        layout: &Self::Layout,
102        name: Arc<str>,
103        segment_source: Arc<dyn SegmentSource>,
104        session: &VortexSession,
105    ) -> VortexResult<LayoutReaderRef> {
106        Ok(Arc::new(ZonedReader::try_new(
107            layout.clone(),
108            name,
109            segment_source,
110            session.clone(),
111        )?))
112    }
113
114    #[cfg(gpu_unstable)]
115    fn new_gpu_reader(
116        layout: &Self::Layout,
117        name: Arc<str>,
118        segment_source: Arc<dyn SegmentSource>,
119        ctx: Arc<cudarc::driver::CudaContext>,
120    ) -> VortexResult<crate::gpu::GpuLayoutReaderRef> {
121        // skip prunning and immediately return data child
122        layout
123            .children
124            .child(0, layout.dtype())?
125            .new_gpu_reader(name, segment_source, ctx)
126    }
127
128    fn build(
129        _encoding: &Self::Encoding,
130        dtype: &DType,
131        _row_count: u64,
132        metadata: &ZonedMetadata,
133        _segment_ids: Vec<SegmentId>,
134        children: &dyn LayoutChildren,
135        _ctx: ArrayContext,
136    ) -> VortexResult<Self::Layout> {
137        Ok(ZonedLayout {
138            dtype: dtype.clone(),
139            children: children.to_arc(),
140            zone_len: metadata.zone_len as usize,
141            present_stats: metadata.present_stats.clone(),
142        })
143    }
144
145    fn with_children(layout: &mut Self::Layout, children: Vec<LayoutRef>) -> VortexResult<()> {
146        if children.len() != 2 {
147            vortex_bail!(
148                "ZonedLayout expects exactly 2 children (data, zones), got {}",
149                children.len()
150            );
151        }
152        layout.children = OwnedLayoutChildren::layout_children(children);
153        Ok(())
154    }
155}
156
157#[derive(Debug)]
158pub struct ZonedLayoutEncoding;
159
160#[derive(Clone, Debug)]
161pub struct ZonedLayout {
162    dtype: DType,
163    children: Arc<dyn LayoutChildren>,
164    zone_len: usize,
165    present_stats: Arc<[Stat]>,
166}
167
168impl ZonedLayout {
169    pub fn new(
170        data: LayoutRef,
171        zones: LayoutRef,
172        zone_len: usize,
173        present_stats: Arc<[Stat]>,
174    ) -> Self {
175        if zone_len == 0 {
176            vortex_panic!("Zone length must be greater than 0");
177        }
178        let expected_dtype = ZoneMap::dtype_for_stats_table(data.dtype(), &present_stats);
179        if zones.dtype() != &expected_dtype {
180            vortex_panic!("Invalid zone map layout: zones dtype does not match expected dtype");
181        }
182        Self {
183            dtype: data.dtype().clone(),
184            children: OwnedLayoutChildren::layout_children(vec![data, zones]),
185            zone_len,
186            present_stats,
187        }
188    }
189
190    pub fn nzones(&self) -> usize {
191        usize::try_from(self.children.child_row_count(1)).vortex_expect("Invalid number of zones")
192    }
193
194    /// Returns an array of stats that exist in the layout's data, must be sorted.
195    pub fn present_stats(&self) -> &Arc<[Stat]> {
196        &self.present_stats
197    }
198}
199
200#[derive(Debug, PartialEq, Eq, Clone)]
201pub struct ZonedMetadata {
202    pub(super) zone_len: u32,
203    pub(super) present_stats: Arc<[Stat]>,
204}
205
206impl DeserializeMetadata for ZonedMetadata {
207    type Output = Self;
208
209    fn deserialize(metadata: &[u8]) -> VortexResult<Self::Output> {
210        let zone_len = u32::try_from_le_bytes(&metadata[0..4])?;
211        let present_stats: Arc<[Stat]> = stats_from_bitset_bytes(&metadata[4..]).into();
212        Ok(Self {
213            zone_len,
214            present_stats,
215        })
216    }
217}
218
219impl SerializeMetadata for ZonedMetadata {
220    fn serialize(self) -> Vec<u8> {
221        let mut metadata = vec![];
222        // First, write the block size to the metadata.
223        metadata.extend_from_slice(&self.zone_len.to_le_bytes());
224        // Then write the bit-set of statistics.
225        metadata.extend_from_slice(&as_stat_bitset_bytes(&self.present_stats));
226        metadata
227    }
228}
229
230#[cfg(test)]
231mod tests {
232    use rstest::rstest;
233
234    use super::*;
235
236    #[rstest]
237    #[case(ZonedMetadata {
238            zone_len: u32::MAX,
239            present_stats: Arc::new([]),
240        })]
241    #[case(ZonedMetadata {
242            zone_len: 0,
243            present_stats: Arc::new([Stat::IsConstant]),
244        })]
245    #[case::all_sorted(ZonedMetadata {
246            zone_len: 314,
247            present_stats: Arc::new([Stat::IsConstant, Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
248        })]
249    #[case::some_sorted(ZonedMetadata {
250            zone_len: 314,
251            present_stats: Arc::new([Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
252        })]
253    fn test_metadata_serialization(#[case] metadata: ZonedMetadata) {
254        let serialized = metadata.clone().serialize();
255        let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
256        assert_eq!(deserialized, metadata);
257    }
258
259    #[test]
260    fn test_deserialize_unsorted_stats() {
261        let metadata = ZonedMetadata {
262            zone_len: u32::MAX,
263            present_stats: Arc::new([Stat::IsStrictSorted, Stat::IsSorted]),
264        };
265        let serialized = metadata.clone().serialize();
266        let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
267        assert!(deserialized.present_stats.is_sorted());
268        assert_eq!(
269            deserialized.present_stats.len(),
270            metadata.present_stats.len()
271        );
272        assert_ne!(deserialized.present_stats, metadata.present_stats);
273    }
274}