vortex_layout/layouts/dict/
mod.rs

1mod reader;
2pub mod writer;
3
4use std::collections::BTreeSet;
5use std::sync::Arc;
6
7use reader::DictReader;
8use vortex_array::{ArrayContext, DeserializeMetadata, ProstMetadata};
9use vortex_dtype::{DType, FieldMask, PType};
10use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_panic};
11
12use crate::children::LayoutChildren;
13use crate::segments::{SegmentId, SegmentSource};
14use crate::{
15    LayoutChildType, LayoutEncodingRef, LayoutId, LayoutReaderRef, LayoutRef, VTable, vtable,
16};
17
18vtable!(Dict);
19
20impl VTable for DictVTable {
21    type Layout = DictLayout;
22    type Encoding = DictLayoutEncoding;
23    type Metadata = ProstMetadata<DictLayoutMetadata>;
24
25    fn id(_encoding: &Self::Encoding) -> LayoutId {
26        LayoutId::new_ref("vortex.dict")
27    }
28
29    fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
30        LayoutEncodingRef::new_ref(DictLayoutEncoding.as_ref())
31    }
32
33    fn row_count(layout: &Self::Layout) -> u64 {
34        layout.codes.row_count()
35    }
36
37    fn dtype(layout: &Self::Layout) -> &DType {
38        layout.values.dtype()
39    }
40
41    fn metadata(layout: &Self::Layout) -> Self::Metadata {
42        ProstMetadata(DictLayoutMetadata::new(
43            PType::try_from(layout.codes.dtype()).vortex_expect("ptype"),
44        ))
45    }
46
47    fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
48        vec![]
49    }
50
51    fn nchildren(_layout: &Self::Layout) -> usize {
52        2
53    }
54
55    fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
56        match idx {
57            0 => Ok(layout.values.clone()),
58            1 => Ok(layout.codes.clone()),
59            _ => vortex_bail!("Unreachable child index: {}", idx),
60        }
61    }
62
63    fn child_type(_layout: &Self::Layout, idx: usize) -> LayoutChildType {
64        match idx {
65            0 => LayoutChildType::Auxiliary("values".into()),
66            1 => LayoutChildType::Transparent("codes".into()),
67            _ => vortex_panic!("Unreachable child index: {}", idx),
68        }
69    }
70
71    fn register_splits(
72        layout: &Self::Layout,
73        field_mask: &[FieldMask],
74        row_offset: u64,
75        splits: &mut BTreeSet<u64>,
76    ) -> VortexResult<()> {
77        layout.codes.register_splits(field_mask, row_offset, splits)
78    }
79
80    fn new_reader(
81        layout: &Self::Layout,
82        name: &Arc<str>,
83        segment_source: &Arc<dyn SegmentSource>,
84        ctx: &ArrayContext,
85    ) -> VortexResult<LayoutReaderRef> {
86        Ok(Arc::new(DictReader::try_new(
87            layout.clone(),
88            name.clone(),
89            segment_source,
90            ctx,
91        )?))
92    }
93
94    fn build(
95        _encoding: &Self::Encoding,
96        dtype: &DType,
97        _row_count: u64,
98        metadata: &<Self::Metadata as DeserializeMetadata>::Output,
99        _segment_ids: Vec<SegmentId>,
100        children: &dyn LayoutChildren,
101    ) -> VortexResult<Self::Layout> {
102        let values = children.child(0, dtype)?;
103        let codes = children.child(
104            1,
105            &DType::Primitive(metadata.codes_ptype(), dtype.nullability()),
106        )?;
107        Ok(DictLayout { values, codes })
108    }
109}
110
111#[derive(Debug)]
112pub struct DictLayoutEncoding;
113
114#[derive(Clone, Debug)]
115pub struct DictLayout {
116    values: LayoutRef,
117    codes: LayoutRef,
118}
119
120impl DictLayout {
121    pub(super) fn new(values: LayoutRef, codes: LayoutRef) -> Self {
122        Self { values, codes }
123    }
124}
125
126#[derive(prost::Message)]
127pub struct DictLayoutMetadata {
128    #[prost(enumeration = "PType", tag = "1")]
129    // i32 is required for proto, use the generated getter to read this field.
130    codes_ptype: i32,
131}
132
133impl DictLayoutMetadata {
134    pub fn new(codes_ptype: PType) -> Self {
135        let mut metadata = Self::default();
136        metadata.set_codes_ptype(codes_ptype);
137        metadata
138    }
139}