vortex_layout/layouts/zoned/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4mod builder;
5mod reader;
6pub mod writer;
7pub mod zone_map;
8
9use std::sync::Arc;
10
11pub use builder::{MAX_IS_TRUNCATED, MIN_IS_TRUNCATED, lower_bound, upper_bound};
12use vortex_array::stats::{Stat, as_stat_bitset_bytes, stats_from_bitset_bytes};
13use vortex_array::{ArrayContext, DeserializeMetadata, SerializeMetadata};
14use vortex_dtype::{DType, TryFromBytes};
15use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_panic};
16
17use crate::children::LayoutChildren;
18use crate::layouts::zoned::reader::ZonedReader;
19use crate::layouts::zoned::zone_map::ZoneMap;
20use crate::segments::{SegmentId, SegmentSource};
21use crate::{
22    LayoutChildType, LayoutEncodingRef, LayoutId, LayoutReaderRef, LayoutRef, VTable, vtable,
23};
24
25vtable!(Zoned);
26
27impl VTable for ZonedVTable {
28    type Layout = ZonedLayout;
29    type Encoding = ZonedLayoutEncoding;
30    type Metadata = ZonedMetadata;
31
32    fn id(_encoding: &Self::Encoding) -> LayoutId {
33        LayoutId::new_ref("vortex.stats") // For legacy reasons, this is called stats
34    }
35
36    fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
37        LayoutEncodingRef::new_ref(ZonedLayoutEncoding.as_ref())
38    }
39
40    fn row_count(layout: &Self::Layout) -> u64 {
41        layout.data.row_count()
42    }
43
44    fn dtype(layout: &Self::Layout) -> &DType {
45        layout.data.dtype()
46    }
47
48    fn metadata(layout: &Self::Layout) -> Self::Metadata {
49        ZonedMetadata {
50            zone_len: u32::try_from(layout.zone_len).vortex_expect("Invalid zone length"),
51            present_stats: layout.present_stats.clone(),
52        }
53    }
54
55    fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
56        vec![]
57    }
58
59    fn nchildren(_layout: &Self::Layout) -> usize {
60        2
61    }
62
63    fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
64        match idx {
65            0 => Ok(layout.data.clone()),
66            1 => Ok(layout.zones.clone()),
67            _ => vortex_bail!("Invalid child index: {}", idx),
68        }
69    }
70
71    fn child_type(_layout: &Self::Layout, idx: usize) -> LayoutChildType {
72        match idx {
73            0 => LayoutChildType::Transparent("data".into()),
74            1 => LayoutChildType::Auxiliary("zones".into()),
75            _ => vortex_panic!("Invalid child index: {}", idx),
76        }
77    }
78
79    fn new_reader(
80        layout: &Self::Layout,
81        name: Arc<str>,
82        segment_source: Arc<dyn SegmentSource>,
83    ) -> VortexResult<LayoutReaderRef> {
84        Ok(Arc::new(ZonedReader::try_new(
85            layout.clone(),
86            name,
87            segment_source,
88        )?))
89    }
90
91    fn build(
92        _encoding: &Self::Encoding,
93        dtype: &DType,
94        _row_count: u64,
95        metadata: &<Self::Metadata as DeserializeMetadata>::Output,
96        _segment_ids: Vec<SegmentId>,
97        children: &dyn LayoutChildren,
98        _ctx: ArrayContext,
99    ) -> VortexResult<Self::Layout> {
100        let data = children.child(0, dtype)?;
101
102        let zones_dtype = ZoneMap::dtype_for_stats_table(data.dtype(), &metadata.present_stats);
103        let zones = children.child(1, &zones_dtype)?;
104
105        Ok(ZonedLayout::new(
106            data,
107            zones,
108            metadata.zone_len as usize,
109            metadata.present_stats.clone(),
110        ))
111    }
112}
113
114#[derive(Debug)]
115pub struct ZonedLayoutEncoding;
116
117#[derive(Clone, Debug)]
118pub struct ZonedLayout {
119    data: LayoutRef,
120    zones: LayoutRef,
121    zone_len: usize,
122    present_stats: Arc<[Stat]>,
123}
124
125impl ZonedLayout {
126    pub fn new(
127        data: LayoutRef,
128        zones: LayoutRef,
129        zone_len: usize,
130        present_stats: Arc<[Stat]>,
131    ) -> Self {
132        if zone_len == 0 {
133            vortex_panic!("Zone length must be greater than 0");
134        }
135        let expected_dtype = ZoneMap::dtype_for_stats_table(data.dtype(), &present_stats);
136        if zones.dtype() != &expected_dtype {
137            vortex_panic!("Invalid zone map layout: zones dtype does not match expected dtype");
138        }
139        Self {
140            data,
141            zones,
142            zone_len,
143            present_stats,
144        }
145    }
146
147    pub fn nzones(&self) -> usize {
148        usize::try_from(self.zones.row_count()).vortex_expect("Invalid number of zones")
149    }
150
151    /// Returns an array of stats that exist in the layout's data, must be sorted.
152    pub fn present_stats(&self) -> &Arc<[Stat]> {
153        &self.present_stats
154    }
155}
156
157#[derive(Debug, PartialEq, Eq, Clone)]
158pub struct ZonedMetadata {
159    pub(super) zone_len: u32,
160    pub(super) present_stats: Arc<[Stat]>,
161}
162
163impl DeserializeMetadata for ZonedMetadata {
164    type Output = Self;
165
166    fn deserialize(metadata: &[u8]) -> VortexResult<Self::Output> {
167        let zone_len = u32::try_from_le_bytes(&metadata[0..4])?;
168        let present_stats: Arc<[Stat]> = stats_from_bitset_bytes(&metadata[4..]).into();
169        Ok(Self {
170            zone_len,
171            present_stats,
172        })
173    }
174}
175
176impl SerializeMetadata for ZonedMetadata {
177    fn serialize(self) -> Vec<u8> {
178        let mut metadata = vec![];
179        // First, write the block size to the metadata.
180        metadata.extend_from_slice(&self.zone_len.to_le_bytes());
181        // Then write the bit-set of statistics.
182        metadata.extend_from_slice(&as_stat_bitset_bytes(&self.present_stats));
183        metadata
184    }
185}
186
187#[cfg(test)]
188mod tests {
189
190    use rstest::rstest;
191
192    use super::*;
193
194    #[rstest]
195    #[case(ZonedMetadata {
196            zone_len: u32::MAX,
197            present_stats: Arc::new([]),
198        })]
199    #[case(ZonedMetadata {
200            zone_len: 0,
201            present_stats: Arc::new([Stat::IsConstant]),
202        })]
203    #[case::all_sorted(ZonedMetadata {
204            zone_len: 314,
205            present_stats: Arc::new([Stat::IsConstant, Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
206        })]
207    #[case::some_sorted(ZonedMetadata {
208            zone_len: 314,
209            present_stats: Arc::new([Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
210        })]
211    fn test_metadata_serialization(#[case] metadata: ZonedMetadata) {
212        let serialized = metadata.clone().serialize();
213        let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
214        assert_eq!(deserialized, metadata);
215    }
216
217    #[test]
218    fn test_deserialize_unsorted_stats() {
219        let metadata = ZonedMetadata {
220            zone_len: u32::MAX,
221            present_stats: Arc::new([Stat::IsStrictSorted, Stat::IsSorted]),
222        };
223        let serialized = metadata.clone().serialize();
224        let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
225        assert!(deserialized.present_stats.is_sorted());
226        assert_eq!(
227            deserialized.present_stats.len(),
228            metadata.present_stats.len()
229        );
230        assert_ne!(deserialized.present_stats, metadata.present_stats);
231    }
232}