vortex_layout/layouts/dict/
mod.rs1mod reader;
5pub mod writer;
6
7use std::sync::Arc;
8
9use reader::DictReader;
10use vortex_array::ArrayContext;
11use vortex_array::DeserializeMetadata;
12use vortex_array::ProstMetadata;
13use vortex_dtype::DType;
14use vortex_dtype::Nullability;
15use vortex_dtype::PType;
16use vortex_error::VortexExpect;
17use vortex_error::VortexResult;
18use vortex_error::vortex_bail;
19use vortex_error::vortex_ensure;
20use vortex_error::vortex_err;
21use vortex_error::vortex_panic;
22use vortex_session::VortexSession;
23
24use crate::LayoutChildType;
25use crate::LayoutEncodingRef;
26use crate::LayoutId;
27use crate::LayoutReaderRef;
28use crate::LayoutRef;
29use crate::VTable;
30use crate::children::LayoutChildren;
31use crate::segments::SegmentId;
32use crate::segments::SegmentSource;
33use crate::vtable;
34
35vtable!(Dict);
36
37impl VTable for DictVTable {
38 type Layout = DictLayout;
39 type Encoding = DictLayoutEncoding;
40 type Metadata = ProstMetadata<DictLayoutMetadata>;
41
42 fn id(_encoding: &Self::Encoding) -> LayoutId {
43 LayoutId::new_ref("vortex.dict")
44 }
45
46 fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
47 LayoutEncodingRef::new_ref(DictLayoutEncoding.as_ref())
48 }
49
50 fn row_count(layout: &Self::Layout) -> u64 {
51 layout.codes.row_count()
52 }
53
54 fn dtype(layout: &Self::Layout) -> &DType {
55 layout.values.dtype()
56 }
57
58 fn metadata(layout: &Self::Layout) -> Self::Metadata {
59 let mut metadata =
60 DictLayoutMetadata::new(PType::try_from(layout.codes.dtype()).vortex_expect("ptype"));
61 metadata.is_nullable_codes = Some(layout.codes.dtype().is_nullable());
62 metadata.all_values_referenced = Some(layout.all_values_referenced);
63 ProstMetadata(metadata)
64 }
65
66 fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
67 vec![]
68 }
69
70 fn nchildren(_layout: &Self::Layout) -> usize {
71 2
72 }
73
74 fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
75 match idx {
76 0 => Ok(layout.values.clone()),
77 1 => Ok(layout.codes.clone()),
78 _ => vortex_bail!("Unreachable child index: {}", idx),
79 }
80 }
81
82 fn child_type(_layout: &Self::Layout, idx: usize) -> LayoutChildType {
83 match idx {
84 0 => LayoutChildType::Auxiliary("values".into()),
85 1 => LayoutChildType::Transparent("codes".into()),
86 _ => vortex_panic!("Unreachable child index: {}", idx),
87 }
88 }
89
90 fn new_reader(
91 layout: &Self::Layout,
92 name: Arc<str>,
93 segment_source: Arc<dyn SegmentSource>,
94 session: &VortexSession,
95 ) -> VortexResult<LayoutReaderRef> {
96 Ok(Arc::new(DictReader::try_new(
97 layout.clone(),
98 name,
99 segment_source,
100 session.clone(),
101 )?))
102 }
103
104 #[cfg(gpu_unstable)]
105 fn new_gpu_reader(
106 _layout: &Self::Layout,
107 _name: Arc<str>,
108 _segment_source: Arc<dyn SegmentSource>,
109 _ctx: Arc<cudarc::driver::CudaContext>,
110 ) -> VortexResult<crate::gpu::GpuLayoutReaderRef> {
111 todo!()
112 }
113
114 fn build(
115 _encoding: &Self::Encoding,
116 dtype: &DType,
117 _row_count: u64,
118 metadata: &<Self::Metadata as DeserializeMetadata>::Output,
119 _segment_ids: Vec<SegmentId>,
120 children: &dyn LayoutChildren,
121 _ctx: ArrayContext,
122 ) -> VortexResult<Self::Layout> {
123 let values = children.child(0, dtype)?;
124 let codes_nullable = metadata
125 .is_nullable_codes
126 .map(Nullability::from)
127 .unwrap_or_else(|| dtype.nullability());
131 let codes = children.child(1, &DType::Primitive(metadata.codes_ptype(), codes_nullable))?;
132 Ok(unsafe {
133 DictLayout::new(values, codes)
134 .set_all_values_referenced(metadata.all_values_referenced.unwrap_or(false))
135 })
136 }
137
138 fn with_children(layout: &mut Self::Layout, children: Vec<LayoutRef>) -> VortexResult<()> {
139 vortex_ensure!(
140 children.len() == 2,
141 "DictLayout expects exactly 2 children (values, codes), got {}",
142 children.len()
143 );
144 let mut children_iter = children.into_iter();
145 layout.values = children_iter
146 .next()
147 .ok_or_else(|| vortex_err!("Missing values child"))?;
148 layout.codes = children_iter
149 .next()
150 .ok_or_else(|| vortex_err!("Missing codes child"))?;
151 Ok(())
152 }
153}
154
155#[derive(Debug)]
156pub struct DictLayoutEncoding;
157
158#[derive(Clone, Debug)]
159pub struct DictLayout {
160 values: LayoutRef,
161 codes: LayoutRef,
162 all_values_referenced: bool,
166}
167
168impl DictLayout {
169 pub(crate) fn new(values: LayoutRef, codes: LayoutRef) -> Self {
170 Self {
171 values,
172 codes,
173 all_values_referenced: false,
174 }
175 }
176
177 pub unsafe fn set_all_values_referenced(mut self, all_values_referenced: bool) -> Self {
188 self.all_values_referenced = all_values_referenced;
189 self
190 }
191
192 pub fn has_all_values_referenced(&self) -> bool {
193 self.all_values_referenced
194 }
195}
196
197#[derive(prost::Message)]
198pub struct DictLayoutMetadata {
199 #[prost(enumeration = "PType", tag = "1")]
200 codes_ptype: i32,
202 #[prost(optional, bool, tag = "2")]
204 is_nullable_codes: Option<bool>,
205 #[prost(optional, bool, tag = "3")]
210 pub(crate) all_values_referenced: Option<bool>,
211}
212
213impl DictLayoutMetadata {
214 pub fn new(codes_ptype: PType) -> Self {
215 let mut metadata = Self::default();
216 metadata.set_codes_ptype(codes_ptype);
217 metadata
218 }
219}