vortex_layout/layouts/dict/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4mod reader;
5pub mod writer;
6
7use std::sync::Arc;
8
9use reader::DictReader;
10use vortex_array::{ArrayContext, DeserializeMetadata, ProstMetadata};
11use vortex_dtype::{DType, Nullability, PType};
12use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_panic};
13
14use crate::children::LayoutChildren;
15use crate::segments::{SegmentId, SegmentSource};
16use crate::{
17    LayoutChildType, LayoutEncodingRef, LayoutId, LayoutReaderRef, LayoutRef, VTable, vtable,
18};
19
20vtable!(Dict);
21
22impl VTable for DictVTable {
23    type Layout = DictLayout;
24    type Encoding = DictLayoutEncoding;
25    type Metadata = ProstMetadata<DictLayoutMetadata>;
26
27    fn id(_encoding: &Self::Encoding) -> LayoutId {
28        LayoutId::new_ref("vortex.dict")
29    }
30
31    fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
32        LayoutEncodingRef::new_ref(DictLayoutEncoding.as_ref())
33    }
34
35    fn row_count(layout: &Self::Layout) -> u64 {
36        layout.codes.row_count()
37    }
38
39    fn dtype(layout: &Self::Layout) -> &DType {
40        layout.values.dtype()
41    }
42
43    fn metadata(layout: &Self::Layout) -> Self::Metadata {
44        let mut metadata =
45            DictLayoutMetadata::new(PType::try_from(layout.codes.dtype()).vortex_expect("ptype"));
46        metadata.is_nullable_codes = Some(layout.codes.dtype().is_nullable());
47        ProstMetadata(metadata)
48    }
49
50    fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
51        vec![]
52    }
53
54    fn nchildren(_layout: &Self::Layout) -> usize {
55        2
56    }
57
58    fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
59        match idx {
60            0 => Ok(layout.values.clone()),
61            1 => Ok(layout.codes.clone()),
62            _ => vortex_bail!("Unreachable child index: {}", idx),
63        }
64    }
65
66    fn child_type(_layout: &Self::Layout, idx: usize) -> LayoutChildType {
67        match idx {
68            0 => LayoutChildType::Auxiliary("values".into()),
69            1 => LayoutChildType::Transparent("codes".into()),
70            _ => vortex_panic!("Unreachable child index: {}", idx),
71        }
72    }
73
74    fn new_reader(
75        layout: &Self::Layout,
76        name: Arc<str>,
77        segment_source: Arc<dyn SegmentSource>,
78    ) -> VortexResult<LayoutReaderRef> {
79        Ok(Arc::new(DictReader::try_new(
80            layout.clone(),
81            name,
82            segment_source,
83        )?))
84    }
85
86    #[cfg(gpu_unstable)]
87    fn new_gpu_reader(
88        _layout: &Self::Layout,
89        _name: Arc<str>,
90        _segment_source: Arc<dyn SegmentSource>,
91        _ctx: Arc<cudarc::driver::CudaContext>,
92    ) -> VortexResult<crate::gpu::GpuLayoutReaderRef> {
93        todo!()
94    }
95
96    fn build(
97        _encoding: &Self::Encoding,
98        dtype: &DType,
99        _row_count: u64,
100        metadata: &<Self::Metadata as DeserializeMetadata>::Output,
101        _segment_ids: Vec<SegmentId>,
102        children: &dyn LayoutChildren,
103        _ctx: ArrayContext,
104    ) -> VortexResult<Self::Layout> {
105        let values = children.child(0, dtype)?;
106        let codes_nullable = metadata
107            .is_nullable_codes
108            .map(Nullability::from)
109            // The old behaviour (without `is_nullable_codes` metadata) used the nullability
110            // of the values (and whole array).
111            // see [`SerdeVTable<DictVTable>::build`].
112            .unwrap_or_else(|| dtype.nullability());
113        let codes = children.child(1, &DType::Primitive(metadata.codes_ptype(), codes_nullable))?;
114        Ok(DictLayout { values, codes })
115    }
116}
117
118#[derive(Debug)]
119pub struct DictLayoutEncoding;
120
121#[derive(Clone, Debug)]
122pub struct DictLayout {
123    values: LayoutRef,
124    codes: LayoutRef,
125}
126
127impl DictLayout {
128    pub(super) fn new(values: LayoutRef, codes: LayoutRef) -> Self {
129        Self { values, codes }
130    }
131}
132
133#[derive(prost::Message)]
134pub struct DictLayoutMetadata {
135    #[prost(enumeration = "PType", tag = "1")]
136    // i32 is required for proto, use the generated getter to read this field.
137    codes_ptype: i32,
138    // nullable codes are optional since they were added after stabilisation
139    #[prost(optional, bool, tag = "2")]
140    is_nullable_codes: Option<bool>,
141}
142
143impl DictLayoutMetadata {
144    pub fn new(codes_ptype: PType) -> Self {
145        let mut metadata = Self::default();
146        metadata.set_codes_ptype(codes_ptype);
147        metadata
148    }
149}