vortex_layout/layouts/dict/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4mod reader;
5pub mod writer;
6
7use std::sync::Arc;
8
9use reader::DictReader;
10use vortex_array::{ArrayContext, DeserializeMetadata, ProstMetadata};
11use vortex_dtype::{DType, PType};
12use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_panic};
13
14use crate::children::LayoutChildren;
15use crate::segments::{SegmentId, SegmentSource};
16use crate::{
17    LayoutChildType, LayoutEncodingRef, LayoutId, LayoutReaderRef, LayoutRef, VTable, vtable,
18};
19
20vtable!(Dict);
21
22impl VTable for DictVTable {
23    type Layout = DictLayout;
24    type Encoding = DictLayoutEncoding;
25    type Metadata = ProstMetadata<DictLayoutMetadata>;
26
27    fn id(_encoding: &Self::Encoding) -> LayoutId {
28        LayoutId::new_ref("vortex.dict")
29    }
30
31    fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
32        LayoutEncodingRef::new_ref(DictLayoutEncoding.as_ref())
33    }
34
35    fn row_count(layout: &Self::Layout) -> u64 {
36        layout.codes.row_count()
37    }
38
39    fn dtype(layout: &Self::Layout) -> &DType {
40        layout.values.dtype()
41    }
42
43    fn metadata(layout: &Self::Layout) -> Self::Metadata {
44        ProstMetadata(DictLayoutMetadata::new(
45            PType::try_from(layout.codes.dtype()).vortex_expect("ptype"),
46        ))
47    }
48
49    fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
50        vec![]
51    }
52
53    fn nchildren(_layout: &Self::Layout) -> usize {
54        2
55    }
56
57    fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
58        match idx {
59            0 => Ok(layout.values.clone()),
60            1 => Ok(layout.codes.clone()),
61            _ => vortex_bail!("Unreachable child index: {}", idx),
62        }
63    }
64
65    fn child_type(_layout: &Self::Layout, idx: usize) -> LayoutChildType {
66        match idx {
67            0 => LayoutChildType::Auxiliary("values".into()),
68            1 => LayoutChildType::Transparent("codes".into()),
69            _ => vortex_panic!("Unreachable child index: {}", idx),
70        }
71    }
72
73    fn new_reader(
74        layout: &Self::Layout,
75        name: Arc<str>,
76        segment_source: Arc<dyn SegmentSource>,
77    ) -> VortexResult<LayoutReaderRef> {
78        Ok(Arc::new(DictReader::try_new(
79            layout.clone(),
80            name,
81            segment_source,
82        )?))
83    }
84
85    fn build(
86        _encoding: &Self::Encoding,
87        dtype: &DType,
88        _row_count: u64,
89        metadata: &<Self::Metadata as DeserializeMetadata>::Output,
90        _segment_ids: Vec<SegmentId>,
91        children: &dyn LayoutChildren,
92        _ctx: ArrayContext,
93    ) -> VortexResult<Self::Layout> {
94        let values = children.child(0, dtype)?;
95        let codes = children.child(
96            1,
97            &DType::Primitive(metadata.codes_ptype(), dtype.nullability()),
98        )?;
99        Ok(DictLayout { values, codes })
100    }
101}
102
103#[derive(Debug)]
104pub struct DictLayoutEncoding;
105
106#[derive(Clone, Debug)]
107pub struct DictLayout {
108    values: LayoutRef,
109    codes: LayoutRef,
110}
111
112impl DictLayout {
113    pub(super) fn new(values: LayoutRef, codes: LayoutRef) -> Self {
114        Self { values, codes }
115    }
116}
117
118#[derive(prost::Message)]
119pub struct DictLayoutMetadata {
120    #[prost(enumeration = "PType", tag = "1")]
121    // i32 is required for proto, use the generated getter to read this field.
122    codes_ptype: i32,
123}
124
125impl DictLayoutMetadata {
126    pub fn new(codes_ptype: PType) -> Self {
127        let mut metadata = Self::default();
128        metadata.set_codes_ptype(codes_ptype);
129        metadata
130    }
131}