vortex_layout/layouts/zoned/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4mod builder;
5mod reader;
6pub mod writer;
7pub mod zone_map;
8
9use std::sync::Arc;
10
11pub use builder::MAX_IS_TRUNCATED;
12pub use builder::MIN_IS_TRUNCATED;
13pub use builder::lower_bound;
14pub use builder::upper_bound;
15use vortex_array::ArrayContext;
16use vortex_array::DeserializeMetadata;
17use vortex_array::SerializeMetadata;
18use vortex_array::expr::stats::Stat;
19use vortex_array::stats::as_stat_bitset_bytes;
20use vortex_array::stats::stats_from_bitset_bytes;
21use vortex_dtype::DType;
22use vortex_dtype::TryFromBytes;
23use vortex_error::VortexExpect;
24use vortex_error::VortexResult;
25use vortex_error::vortex_bail;
26use vortex_error::vortex_panic;
27use vortex_session::VortexSession;
28
29use crate::LayoutChildType;
30use crate::LayoutEncodingRef;
31use crate::LayoutId;
32use crate::LayoutReaderRef;
33use crate::LayoutRef;
34use crate::VTable;
35use crate::children::LayoutChildren;
36use crate::children::OwnedLayoutChildren;
37use crate::layouts::zoned::reader::ZonedReader;
38use crate::layouts::zoned::zone_map::ZoneMap;
39use crate::segments::SegmentId;
40use crate::segments::SegmentSource;
41use crate::vtable;
42
43vtable!(Zoned);
44
45impl VTable for ZonedVTable {
46    type Layout = ZonedLayout;
47    type Encoding = ZonedLayoutEncoding;
48    type Metadata = ZonedMetadata;
49
50    fn id(_encoding: &Self::Encoding) -> LayoutId {
51        LayoutId::new_ref("vortex.stats") // For legacy reasons, this is called stats
52    }
53
54    fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
55        LayoutEncodingRef::new_ref(ZonedLayoutEncoding.as_ref())
56    }
57
58    fn row_count(layout: &Self::Layout) -> u64 {
59        layout.children.child_row_count(0)
60    }
61
62    fn dtype(layout: &Self::Layout) -> &DType {
63        &layout.dtype
64    }
65
66    fn metadata(layout: &Self::Layout) -> Self::Metadata {
67        ZonedMetadata {
68            zone_len: u32::try_from(layout.zone_len).vortex_expect("Invalid zone length"),
69            present_stats: layout.present_stats.clone(),
70        }
71    }
72
73    fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
74        vec![]
75    }
76
77    fn nchildren(_layout: &Self::Layout) -> usize {
78        2
79    }
80
81    fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
82        match idx {
83            0 => layout.children.child(0, layout.dtype()),
84            1 => layout.children.child(
85                1,
86                &ZoneMap::dtype_for_stats_table(layout.dtype(), &layout.present_stats),
87            ),
88            _ => vortex_bail!("Invalid child index: {}", idx),
89        }
90    }
91
92    fn child_type(_layout: &Self::Layout, idx: usize) -> LayoutChildType {
93        match idx {
94            0 => LayoutChildType::Transparent("data".into()),
95            1 => LayoutChildType::Auxiliary("zones".into()),
96            _ => vortex_panic!("Invalid child index: {}", idx),
97        }
98    }
99
100    fn new_reader(
101        layout: &Self::Layout,
102        name: Arc<str>,
103        segment_source: Arc<dyn SegmentSource>,
104        session: &VortexSession,
105    ) -> VortexResult<LayoutReaderRef> {
106        Ok(Arc::new(ZonedReader::try_new(
107            layout.clone(),
108            name,
109            segment_source,
110            session.clone(),
111        )?))
112    }
113
114    #[cfg(gpu_unstable)]
115    fn new_gpu_reader(
116        layout: &Self::Layout,
117        name: Arc<str>,
118        segment_source: Arc<dyn SegmentSource>,
119        ctx: Arc<cudarc::driver::CudaContext>,
120    ) -> VortexResult<crate::gpu::GpuLayoutReaderRef> {
121        // skip prunning and immediately return data child
122        layout
123            .children
124            .child(0, layout.dtype())?
125            .new_gpu_reader(name, segment_source, ctx)
126    }
127
128    fn build(
129        _encoding: &Self::Encoding,
130        dtype: &DType,
131        _row_count: u64,
132        metadata: &ZonedMetadata,
133        _segment_ids: Vec<SegmentId>,
134        children: &dyn LayoutChildren,
135        _ctx: ArrayContext,
136    ) -> VortexResult<Self::Layout> {
137        Ok(ZonedLayout {
138            dtype: dtype.clone(),
139            children: children.to_arc(),
140            zone_len: metadata.zone_len as usize,
141            present_stats: metadata.present_stats.clone(),
142        })
143    }
144}
145
146#[derive(Debug)]
147pub struct ZonedLayoutEncoding;
148
149#[derive(Clone, Debug)]
150pub struct ZonedLayout {
151    dtype: DType,
152    children: Arc<dyn LayoutChildren>,
153    zone_len: usize,
154    present_stats: Arc<[Stat]>,
155}
156
157impl ZonedLayout {
158    pub fn new(
159        data: LayoutRef,
160        zones: LayoutRef,
161        zone_len: usize,
162        present_stats: Arc<[Stat]>,
163    ) -> Self {
164        if zone_len == 0 {
165            vortex_panic!("Zone length must be greater than 0");
166        }
167        let expected_dtype = ZoneMap::dtype_for_stats_table(data.dtype(), &present_stats);
168        if zones.dtype() != &expected_dtype {
169            vortex_panic!("Invalid zone map layout: zones dtype does not match expected dtype");
170        }
171        Self {
172            dtype: data.dtype().clone(),
173            children: OwnedLayoutChildren::layout_children(vec![data, zones]),
174            zone_len,
175            present_stats,
176        }
177    }
178
179    pub fn nzones(&self) -> usize {
180        usize::try_from(self.children.child_row_count(1)).vortex_expect("Invalid number of zones")
181    }
182
183    /// Returns an array of stats that exist in the layout's data, must be sorted.
184    pub fn present_stats(&self) -> &Arc<[Stat]> {
185        &self.present_stats
186    }
187}
188
189#[derive(Debug, PartialEq, Eq, Clone)]
190pub struct ZonedMetadata {
191    pub(super) zone_len: u32,
192    pub(super) present_stats: Arc<[Stat]>,
193}
194
195impl DeserializeMetadata for ZonedMetadata {
196    type Output = Self;
197
198    fn deserialize(metadata: &[u8]) -> VortexResult<Self::Output> {
199        let zone_len = u32::try_from_le_bytes(&metadata[0..4])?;
200        let present_stats: Arc<[Stat]> = stats_from_bitset_bytes(&metadata[4..]).into();
201        Ok(Self {
202            zone_len,
203            present_stats,
204        })
205    }
206}
207
208impl SerializeMetadata for ZonedMetadata {
209    fn serialize(self) -> Vec<u8> {
210        let mut metadata = vec![];
211        // First, write the block size to the metadata.
212        metadata.extend_from_slice(&self.zone_len.to_le_bytes());
213        // Then write the bit-set of statistics.
214        metadata.extend_from_slice(&as_stat_bitset_bytes(&self.present_stats));
215        metadata
216    }
217}
218
219#[cfg(test)]
220mod tests {
221    use rstest::rstest;
222
223    use super::*;
224
225    #[rstest]
226    #[case(ZonedMetadata {
227            zone_len: u32::MAX,
228            present_stats: Arc::new([]),
229        })]
230    #[case(ZonedMetadata {
231            zone_len: 0,
232            present_stats: Arc::new([Stat::IsConstant]),
233        })]
234    #[case::all_sorted(ZonedMetadata {
235            zone_len: 314,
236            present_stats: Arc::new([Stat::IsConstant, Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
237        })]
238    #[case::some_sorted(ZonedMetadata {
239            zone_len: 314,
240            present_stats: Arc::new([Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
241        })]
242    fn test_metadata_serialization(#[case] metadata: ZonedMetadata) {
243        let serialized = metadata.clone().serialize();
244        let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
245        assert_eq!(deserialized, metadata);
246    }
247
248    #[test]
249    fn test_deserialize_unsorted_stats() {
250        let metadata = ZonedMetadata {
251            zone_len: u32::MAX,
252            present_stats: Arc::new([Stat::IsStrictSorted, Stat::IsSorted]),
253        };
254        let serialized = metadata.clone().serialize();
255        let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
256        assert!(deserialized.present_stats.is_sorted());
257        assert_eq!(
258            deserialized.present_stats.len(),
259            metadata.present_stats.len()
260        );
261        assert_ne!(deserialized.present_stats, metadata.present_stats);
262    }
263}