vortex_layout/layouts/dict/
mod.rs1mod reader;
5pub mod writer;
6
7use std::sync::Arc;
8
9use reader::DictReader;
10use vortex_array::ArrayContext;
11use vortex_array::DeserializeMetadata;
12use vortex_array::ProstMetadata;
13use vortex_dtype::DType;
14use vortex_dtype::Nullability;
15use vortex_dtype::PType;
16use vortex_error::VortexExpect;
17use vortex_error::VortexResult;
18use vortex_error::vortex_bail;
19use vortex_error::vortex_panic;
20use vortex_session::VortexSession;
21
22use crate::LayoutChildType;
23use crate::LayoutEncodingRef;
24use crate::LayoutId;
25use crate::LayoutReaderRef;
26use crate::LayoutRef;
27use crate::VTable;
28use crate::children::LayoutChildren;
29use crate::segments::SegmentId;
30use crate::segments::SegmentSource;
31use crate::vtable;
32
33vtable!(Dict);
34
35impl VTable for DictVTable {
36 type Layout = DictLayout;
37 type Encoding = DictLayoutEncoding;
38 type Metadata = ProstMetadata<DictLayoutMetadata>;
39
40 fn id(_encoding: &Self::Encoding) -> LayoutId {
41 LayoutId::new_ref("vortex.dict")
42 }
43
44 fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
45 LayoutEncodingRef::new_ref(DictLayoutEncoding.as_ref())
46 }
47
48 fn row_count(layout: &Self::Layout) -> u64 {
49 layout.codes.row_count()
50 }
51
52 fn dtype(layout: &Self::Layout) -> &DType {
53 layout.values.dtype()
54 }
55
56 fn metadata(layout: &Self::Layout) -> Self::Metadata {
57 let mut metadata =
58 DictLayoutMetadata::new(PType::try_from(layout.codes.dtype()).vortex_expect("ptype"));
59 metadata.is_nullable_codes = Some(layout.codes.dtype().is_nullable());
60 metadata.all_values_referenced = Some(layout.all_values_referenced);
61 ProstMetadata(metadata)
62 }
63
64 fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
65 vec![]
66 }
67
68 fn nchildren(_layout: &Self::Layout) -> usize {
69 2
70 }
71
72 fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
73 match idx {
74 0 => Ok(layout.values.clone()),
75 1 => Ok(layout.codes.clone()),
76 _ => vortex_bail!("Unreachable child index: {}", idx),
77 }
78 }
79
80 fn child_type(_layout: &Self::Layout, idx: usize) -> LayoutChildType {
81 match idx {
82 0 => LayoutChildType::Auxiliary("values".into()),
83 1 => LayoutChildType::Transparent("codes".into()),
84 _ => vortex_panic!("Unreachable child index: {}", idx),
85 }
86 }
87
88 fn new_reader(
89 layout: &Self::Layout,
90 name: Arc<str>,
91 segment_source: Arc<dyn SegmentSource>,
92 session: &VortexSession,
93 ) -> VortexResult<LayoutReaderRef> {
94 Ok(Arc::new(DictReader::try_new(
95 layout.clone(),
96 name,
97 segment_source,
98 session,
99 )?))
100 }
101
102 #[cfg(gpu_unstable)]
103 fn new_gpu_reader(
104 _layout: &Self::Layout,
105 _name: Arc<str>,
106 _segment_source: Arc<dyn SegmentSource>,
107 _ctx: Arc<cudarc::driver::CudaContext>,
108 ) -> VortexResult<crate::gpu::GpuLayoutReaderRef> {
109 todo!()
110 }
111
112 fn build(
113 _encoding: &Self::Encoding,
114 dtype: &DType,
115 _row_count: u64,
116 metadata: &<Self::Metadata as DeserializeMetadata>::Output,
117 _segment_ids: Vec<SegmentId>,
118 children: &dyn LayoutChildren,
119 _ctx: ArrayContext,
120 ) -> VortexResult<Self::Layout> {
121 let values = children.child(0, dtype)?;
122 let codes_nullable = metadata
123 .is_nullable_codes
124 .map(Nullability::from)
125 .unwrap_or_else(|| dtype.nullability());
129 let codes = children.child(1, &DType::Primitive(metadata.codes_ptype(), codes_nullable))?;
130 Ok(unsafe {
131 DictLayout::new(values, codes)
132 .set_all_values_referenced(metadata.all_values_referenced.unwrap_or(false))
133 })
134 }
135}
136
137#[derive(Debug)]
138pub struct DictLayoutEncoding;
139
140#[derive(Clone, Debug)]
141pub struct DictLayout {
142 values: LayoutRef,
143 codes: LayoutRef,
144 all_values_referenced: bool,
148}
149
150impl DictLayout {
151 pub(crate) fn new(values: LayoutRef, codes: LayoutRef) -> Self {
152 Self {
153 values,
154 codes,
155 all_values_referenced: false,
156 }
157 }
158
159 pub unsafe fn set_all_values_referenced(mut self, all_values_referenced: bool) -> Self {
170 self.all_values_referenced = all_values_referenced;
171 self
172 }
173
174 pub fn has_all_values_referenced(&self) -> bool {
175 self.all_values_referenced
176 }
177}
178
179#[derive(prost::Message)]
180pub struct DictLayoutMetadata {
181 #[prost(enumeration = "PType", tag = "1")]
182 codes_ptype: i32,
184 #[prost(optional, bool, tag = "2")]
186 is_nullable_codes: Option<bool>,
187 #[prost(optional, bool, tag = "3")]
192 pub(crate) all_values_referenced: Option<bool>,
193}
194
195impl DictLayoutMetadata {
196 pub fn new(codes_ptype: PType) -> Self {
197 let mut metadata = Self::default();
198 metadata.set_codes_ptype(codes_ptype);
199 metadata
200 }
201}