vortex_layout/layouts/zoned/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4mod builder;
5mod reader;
6pub mod writer;
7pub mod zone_map;
8
9use std::sync::Arc;
10
11pub use builder::{MAX_IS_TRUNCATED, MIN_IS_TRUNCATED, lower_bound, upper_bound};
12use vortex_array::stats::{Stat, as_stat_bitset_bytes, stats_from_bitset_bytes};
13use vortex_array::{ArrayContext, DeserializeMetadata, SerializeMetadata};
14use vortex_dtype::{DType, TryFromBytes};
15use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_panic};
16
17use crate::children::LayoutChildren;
18use crate::layouts::zoned::reader::ZonedReader;
19use crate::layouts::zoned::zone_map::ZoneMap;
20use crate::segments::{SegmentId, SegmentSource};
21use crate::{
22    LayoutChildType, LayoutEncodingRef, LayoutId, LayoutReaderRef, LayoutRef, VTable, vtable,
23};
24
25vtable!(Zoned);
26
27impl VTable for ZonedVTable {
28    type Layout = ZonedLayout;
29    type Encoding = ZonedLayoutEncoding;
30    type Metadata = ZonedMetadata;
31
32    fn id(_encoding: &Self::Encoding) -> LayoutId {
33        LayoutId::new_ref("vortex.stats") // For legacy reasons, this is called stats
34    }
35
36    fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
37        LayoutEncodingRef::new_ref(ZonedLayoutEncoding.as_ref())
38    }
39
40    fn row_count(layout: &Self::Layout) -> u64 {
41        layout.data.row_count()
42    }
43
44    fn dtype(layout: &Self::Layout) -> &DType {
45        layout.data.dtype()
46    }
47
48    fn metadata(layout: &Self::Layout) -> Self::Metadata {
49        ZonedMetadata {
50            zone_len: u32::try_from(layout.zone_len).vortex_expect("Invalid zone length"),
51            present_stats: layout.present_stats.clone(),
52        }
53    }
54
55    fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
56        vec![]
57    }
58
59    fn nchildren(_layout: &Self::Layout) -> usize {
60        2
61    }
62
63    fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
64        match idx {
65            0 => Ok(layout.data.clone()),
66            1 => Ok(layout.zones.clone()),
67            _ => vortex_bail!("Invalid child index: {}", idx),
68        }
69    }
70
71    fn child_type(_layout: &Self::Layout, idx: usize) -> LayoutChildType {
72        match idx {
73            0 => LayoutChildType::Transparent("data".into()),
74            1 => LayoutChildType::Auxiliary("zones".into()),
75            _ => vortex_panic!("Invalid child index: {}", idx),
76        }
77    }
78
79    fn new_reader(
80        layout: &Self::Layout,
81        name: Arc<str>,
82        segment_source: Arc<dyn SegmentSource>,
83    ) -> VortexResult<LayoutReaderRef> {
84        Ok(Arc::new(ZonedReader::try_new(
85            layout.clone(),
86            name,
87            segment_source,
88        )?))
89    }
90
91    fn build(
92        _encoding: &Self::Encoding,
93        dtype: &DType,
94        _row_count: u64,
95        metadata: &<Self::Metadata as DeserializeMetadata>::Output,
96        _segment_ids: Vec<SegmentId>,
97        children: &dyn LayoutChildren,
98        _ctx: ArrayContext,
99    ) -> VortexResult<Self::Layout> {
100        let data = children.child(0, dtype)?;
101
102        let zones_dtype = ZoneMap::dtype_for_stats_table(data.dtype(), &metadata.present_stats);
103        let zones = children.child(1, &zones_dtype)?;
104
105        Ok(ZonedLayout::new(
106            data,
107            zones,
108            metadata.zone_len as usize,
109            metadata.present_stats.clone(),
110        ))
111    }
112}
113
114#[derive(Debug)]
115pub struct ZonedLayoutEncoding;
116
117#[derive(Clone, Debug)]
118pub struct ZonedLayout {
119    data: LayoutRef,
120    zones: LayoutRef,
121    zone_len: usize,
122    present_stats: Arc<[Stat]>,
123}
124
125impl ZonedLayout {
126    pub fn new(
127        data: LayoutRef,
128        zones: LayoutRef,
129        zone_len: usize,
130        present_stats: Arc<[Stat]>,
131    ) -> Self {
132        let expected_dtype = ZoneMap::dtype_for_stats_table(data.dtype(), &present_stats);
133        if zones.dtype() != &expected_dtype {
134            vortex_panic!("Invalid zone map layout: zones dtype does not match expected dtype");
135        }
136        Self {
137            data,
138            zones,
139            zone_len,
140            present_stats,
141        }
142    }
143
144    pub fn nzones(&self) -> usize {
145        usize::try_from(self.zones.row_count()).vortex_expect("Invalid number of zones")
146    }
147
148    /// Returns an array of stats that exist in the layout's data, must be sorted.
149    pub fn present_stats(&self) -> &Arc<[Stat]> {
150        &self.present_stats
151    }
152}
153
154#[derive(Debug, PartialEq, Eq, Clone)]
155pub struct ZonedMetadata {
156    pub(super) zone_len: u32,
157    pub(super) present_stats: Arc<[Stat]>,
158}
159
160impl DeserializeMetadata for ZonedMetadata {
161    type Output = Self;
162
163    fn deserialize(metadata: &[u8]) -> VortexResult<Self::Output> {
164        let zone_len = u32::try_from_le_bytes(&metadata[0..4])?;
165        let present_stats: Arc<[Stat]> = stats_from_bitset_bytes(&metadata[4..]).into();
166        Ok(Self {
167            zone_len,
168            present_stats,
169        })
170    }
171}
172
173impl SerializeMetadata for ZonedMetadata {
174    fn serialize(self) -> Vec<u8> {
175        let mut metadata = vec![];
176        // First, write the block size to the metadata.
177        metadata.extend_from_slice(&self.zone_len.to_le_bytes());
178        // Then write the bit-set of statistics.
179        metadata.extend_from_slice(&as_stat_bitset_bytes(&self.present_stats));
180        metadata
181    }
182}
183
184#[cfg(test)]
185mod tests {
186
187    use rstest::rstest;
188
189    use super::*;
190
191    #[rstest]
192    #[case(ZonedMetadata {
193            zone_len: u32::MAX,
194            present_stats: Arc::new([]),
195        })]
196    #[case(ZonedMetadata {
197            zone_len: 0,
198            present_stats: Arc::new([Stat::IsConstant]),
199        })]
200    #[case::all_sorted(ZonedMetadata {
201            zone_len: 314,
202            present_stats: Arc::new([Stat::IsConstant, Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
203        })]
204    #[case::some_sorted(ZonedMetadata {
205            zone_len: 314,
206            present_stats: Arc::new([Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
207        })]
208    fn test_metadata_serialization(#[case] metadata: ZonedMetadata) {
209        let serialized = metadata.clone().serialize();
210        let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
211        assert_eq!(deserialized, metadata);
212    }
213
214    #[test]
215    fn test_deserialize_unsorted_stats() {
216        let metadata = ZonedMetadata {
217            zone_len: u32::MAX,
218            present_stats: Arc::new([Stat::IsStrictSorted, Stat::IsSorted]),
219        };
220        let serialized = metadata.clone().serialize();
221        let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
222        assert!(deserialized.present_stats.is_sorted());
223        assert_eq!(
224            deserialized.present_stats.len(),
225            metadata.present_stats.len()
226        );
227        assert_ne!(deserialized.present_stats, metadata.present_stats);
228    }
229}