vortex_layout/layouts/zoned/
mod.rs

1mod reader;
2pub mod writer;
3pub mod zone_map;
4
5use std::collections::BTreeSet;
6use std::sync::Arc;
7
8use vortex_array::stats::{Stat, as_stat_bitset_bytes, stats_from_bitset_bytes};
9use vortex_array::{ArrayContext, DeserializeMetadata, SerializeMetadata};
10use vortex_dtype::{DType, FieldMask, TryFromBytes};
11use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_panic};
12
13use crate::children::LayoutChildren;
14use crate::layouts::zoned::reader::ZonedReader;
15use crate::layouts::zoned::zone_map::ZoneMap;
16use crate::segments::{SegmentId, SegmentSource};
17use crate::{
18    LayoutChildType, LayoutEncodingRef, LayoutId, LayoutReaderRef, LayoutRef, VTable, vtable,
19};
20
21vtable!(Zoned);
22
23impl VTable for ZonedVTable {
24    type Layout = ZonedLayout;
25    type Encoding = ZonedLayoutEncoding;
26    type Metadata = ZonedMetadata;
27
28    fn id(_encoding: &Self::Encoding) -> LayoutId {
29        LayoutId::new_ref("vortex.stats") // For legacy reasons, this is called stats
30    }
31
32    fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
33        LayoutEncodingRef::new_ref(ZonedLayoutEncoding.as_ref())
34    }
35
36    fn row_count(layout: &Self::Layout) -> u64 {
37        layout.data.row_count()
38    }
39
40    fn dtype(layout: &Self::Layout) -> &DType {
41        layout.data.dtype()
42    }
43
44    fn metadata(layout: &Self::Layout) -> Self::Metadata {
45        ZonedMetadata {
46            zone_len: u32::try_from(layout.zone_len).vortex_expect("Invalid zone length"),
47            present_stats: layout.present_stats.clone(),
48        }
49    }
50
51    fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
52        vec![]
53    }
54
55    fn nchildren(_layout: &Self::Layout) -> usize {
56        2
57    }
58
59    fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
60        match idx {
61            0 => Ok(layout.data.clone()),
62            1 => Ok(layout.zones.clone()),
63            _ => vortex_bail!("Invalid child index: {}", idx),
64        }
65    }
66
67    fn child_type(_layout: &Self::Layout, idx: usize) -> LayoutChildType {
68        match idx {
69            0 => LayoutChildType::Transparent("data".into()),
70            1 => LayoutChildType::Auxiliary("zones".into()),
71            _ => vortex_panic!("Invalid child index: {}", idx),
72        }
73    }
74
75    fn register_splits(
76        layout: &Self::Layout,
77        field_mask: &[FieldMask],
78        row_offset: u64,
79        splits: &mut BTreeSet<u64>,
80    ) -> VortexResult<()> {
81        layout.data.register_splits(field_mask, row_offset, splits)
82    }
83
84    fn new_reader(
85        layout: &Self::Layout,
86        name: &Arc<str>,
87        segment_source: &Arc<dyn SegmentSource>,
88        ctx: &ArrayContext,
89    ) -> VortexResult<LayoutReaderRef> {
90        Ok(Arc::new(ZonedReader::try_new(
91            layout.clone(),
92            name.clone(),
93            segment_source.clone(),
94            ctx.clone(),
95        )?))
96    }
97
98    fn build(
99        _encoding: &Self::Encoding,
100        dtype: &DType,
101        _row_count: u64,
102        metadata: &<Self::Metadata as DeserializeMetadata>::Output,
103        _segment_ids: Vec<SegmentId>,
104        children: &dyn LayoutChildren,
105    ) -> VortexResult<Self::Layout> {
106        let data = children.child(0, dtype)?;
107
108        let zones_dtype = ZoneMap::dtype_for_stats_table(data.dtype(), &metadata.present_stats);
109        let zones = children.child(1, &zones_dtype)?;
110
111        Ok(ZonedLayout::new(
112            data,
113            zones,
114            metadata.zone_len as usize,
115            metadata.present_stats.clone(),
116        ))
117    }
118}
119
120#[derive(Debug)]
121pub struct ZonedLayoutEncoding;
122
123#[derive(Clone, Debug)]
124pub struct ZonedLayout {
125    data: LayoutRef,
126    zones: LayoutRef,
127    zone_len: usize,
128    present_stats: Arc<[Stat]>,
129}
130
131impl ZonedLayout {
132    pub fn new(
133        data: LayoutRef,
134        zones: LayoutRef,
135        zone_len: usize,
136        present_stats: Arc<[Stat]>,
137    ) -> Self {
138        let expected_dtype = ZoneMap::dtype_for_stats_table(data.dtype(), &present_stats);
139        if zones.dtype() != &expected_dtype {
140            vortex_panic!("Invalid zone map layout: zones dtype does not match expected dtype");
141        }
142        Self {
143            data,
144            zones,
145            zone_len,
146            present_stats,
147        }
148    }
149
150    pub fn nzones(&self) -> usize {
151        usize::try_from(self.zones.row_count()).vortex_expect("Invalid number of zones")
152    }
153
154    pub fn present_stats(&self) -> &Arc<[Stat]> {
155        &self.present_stats
156    }
157}
158
159#[derive(Debug)]
160pub struct ZonedMetadata {
161    pub(super) zone_len: u32,
162    pub(super) present_stats: Arc<[Stat]>,
163}
164
165impl DeserializeMetadata for ZonedMetadata {
166    type Output = Self;
167
168    fn deserialize(metadata: &[u8]) -> VortexResult<Self::Output> {
169        let zone_len = u32::try_from_le_bytes(&metadata[0..4])?;
170        let present_stats: Arc<[Stat]> = stats_from_bitset_bytes(&metadata[4..]).into();
171        Ok(Self {
172            zone_len,
173            present_stats,
174        })
175    }
176}
177
178impl SerializeMetadata for ZonedMetadata {
179    fn serialize(self) -> Vec<u8> {
180        let mut metadata = vec![];
181        // First, write the block size to the metadata.
182        metadata.extend_from_slice(&self.zone_len.to_le_bytes());
183        // Then write the bit-set of statistics.
184        metadata.extend_from_slice(&as_stat_bitset_bytes(&self.present_stats));
185        metadata
186    }
187}