vortex_layout/layouts/dict/
mod.rs1mod reader;
2pub mod writer;
3
4use std::collections::BTreeSet;
5use std::sync::Arc;
6
7use reader::DictReader;
8use vortex_array::{ArrayContext, DeserializeMetadata, ProstMetadata};
9use vortex_dtype::{DType, FieldMask, PType};
10use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_panic};
11
12use crate::children::LayoutChildren;
13use crate::segments::{SegmentId, SegmentSource};
14use crate::{
15 LayoutChildType, LayoutEncodingRef, LayoutId, LayoutReaderRef, LayoutRef, VTable, vtable,
16};
17
18vtable!(Dict);
19
20impl VTable for DictVTable {
21 type Layout = DictLayout;
22 type Encoding = DictLayoutEncoding;
23 type Metadata = ProstMetadata<DictLayoutMetadata>;
24
25 fn id(_encoding: &Self::Encoding) -> LayoutId {
26 LayoutId::new_ref("vortex.dict")
27 }
28
29 fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
30 LayoutEncodingRef::new_ref(DictLayoutEncoding.as_ref())
31 }
32
33 fn row_count(layout: &Self::Layout) -> u64 {
34 layout.codes.row_count()
35 }
36
37 fn dtype(layout: &Self::Layout) -> &DType {
38 layout.values.dtype()
39 }
40
41 fn metadata(layout: &Self::Layout) -> Self::Metadata {
42 ProstMetadata(DictLayoutMetadata::new(
43 PType::try_from(layout.codes.dtype()).vortex_expect("ptype"),
44 ))
45 }
46
47 fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
48 vec![]
49 }
50
51 fn nchildren(_layout: &Self::Layout) -> usize {
52 2
53 }
54
55 fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
56 match idx {
57 0 => Ok(layout.values.clone()),
58 1 => Ok(layout.codes.clone()),
59 _ => vortex_bail!("Unreachable child index: {}", idx),
60 }
61 }
62
63 fn child_type(_layout: &Self::Layout, idx: usize) -> LayoutChildType {
64 match idx {
65 0 => LayoutChildType::Auxiliary("values".into()),
66 1 => LayoutChildType::Transparent("codes".into()),
67 _ => vortex_panic!("Unreachable child index: {}", idx),
68 }
69 }
70
71 fn register_splits(
72 layout: &Self::Layout,
73 field_mask: &[FieldMask],
74 row_offset: u64,
75 splits: &mut BTreeSet<u64>,
76 ) -> VortexResult<()> {
77 layout.codes.register_splits(field_mask, row_offset, splits)
78 }
79
80 fn new_reader(
81 layout: &Self::Layout,
82 name: &Arc<str>,
83 segment_source: &Arc<dyn SegmentSource>,
84 ctx: &ArrayContext,
85 ) -> VortexResult<LayoutReaderRef> {
86 Ok(Arc::new(DictReader::try_new(
87 layout.clone(),
88 name.clone(),
89 segment_source,
90 ctx,
91 )?))
92 }
93
94 fn build(
95 _encoding: &Self::Encoding,
96 dtype: &DType,
97 _row_count: u64,
98 metadata: &<Self::Metadata as DeserializeMetadata>::Output,
99 _segment_ids: Vec<SegmentId>,
100 children: &dyn LayoutChildren,
101 ) -> VortexResult<Self::Layout> {
102 let values = children.child(0, dtype)?;
103 let codes = children.child(
104 1,
105 &DType::Primitive(metadata.codes_ptype(), dtype.nullability()),
106 )?;
107 Ok(DictLayout { values, codes })
108 }
109}
110
111#[derive(Debug)]
112pub struct DictLayoutEncoding;
113
114#[derive(Clone, Debug)]
115pub struct DictLayout {
116 values: LayoutRef,
117 codes: LayoutRef,
118}
119
120impl DictLayout {
121 pub(super) fn new(values: LayoutRef, codes: LayoutRef) -> Self {
122 Self { values, codes }
123 }
124}
125
126#[derive(prost::Message)]
127pub struct DictLayoutMetadata {
128 #[prost(enumeration = "PType", tag = "1")]
129 codes_ptype: i32,
131}
132
133impl DictLayoutMetadata {
134 pub fn new(codes_ptype: PType) -> Self {
135 let mut metadata = Self::default();
136 metadata.set_codes_ptype(codes_ptype);
137 metadata
138 }
139}