1use fsst::{Decompressor, Symbol};
2use vortex_array::arrays::VarBinArray;
3use vortex_array::stats::{ArrayStats, StatsSetRef};
4use vortex_array::vtable::{
5 ArrayVTable, NotSupported, VTable, ValidityChild, ValidityVTableFromChild,
6};
7use vortex_array::{Array, ArrayRef, EncodingId, EncodingRef, vtable};
8use vortex_buffer::Buffer;
9use vortex_dtype::DType;
10use vortex_error::{VortexResult, vortex_bail};
11
12vtable!(FSST);
13
14impl VTable for FSSTVTable {
15 type Array = FSSTArray;
16 type Encoding = FSSTEncoding;
17
18 type ArrayVTable = Self;
19 type CanonicalVTable = Self;
20 type OperationsVTable = Self;
21 type ValidityVTable = ValidityVTableFromChild;
22 type VisitorVTable = Self;
23 type ComputeVTable = NotSupported;
24 type EncodeVTable = Self;
25 type SerdeVTable = Self;
26
27 fn id(_encoding: &Self::Encoding) -> EncodingId {
28 EncodingId::new_ref("vortex.fsst")
29 }
30
31 fn encoding(_array: &Self::Array) -> EncodingRef {
32 EncodingRef::new_ref(FSSTEncoding.as_ref())
33 }
34}
35
36#[derive(Clone, Debug)]
37pub struct FSSTArray {
38 dtype: DType,
39 symbols: Buffer<Symbol>,
40 symbol_lengths: Buffer<u8>,
41 codes: VarBinArray,
42 uncompressed_lengths: ArrayRef,
44 stats_set: ArrayStats,
45}
46
47#[derive(Clone, Debug)]
48pub struct FSSTEncoding;
49
50impl FSSTArray {
51 pub fn try_new(
60 dtype: DType,
61 symbols: Buffer<Symbol>,
62 symbol_lengths: Buffer<u8>,
63 codes: VarBinArray,
64 uncompressed_lengths: ArrayRef,
65 ) -> VortexResult<Self> {
66 if symbols.len() > 255 {
68 vortex_bail!(InvalidArgument: "symbols array must have length <= 255");
69 }
70 if symbols.len() != symbol_lengths.len() {
71 vortex_bail!(InvalidArgument: "symbols and symbol_lengths arrays must have same length");
72 }
73
74 if uncompressed_lengths.len() != codes.len() {
75 vortex_bail!(InvalidArgument: "uncompressed_lengths must be same len as codes");
76 }
77
78 if !uncompressed_lengths.dtype().is_int() || uncompressed_lengths.dtype().is_nullable() {
79 vortex_bail!(InvalidArgument: "uncompressed_lengths must have integer type and cannot be nullable, found {}", uncompressed_lengths.dtype());
80 }
81
82 if !matches!(codes.dtype(), DType::Binary(_)) {
84 vortex_bail!(InvalidArgument: "codes array must be DType::Binary type");
85 }
86
87 Ok(Self {
88 dtype,
89 symbols,
90 symbol_lengths,
91 codes,
92 uncompressed_lengths,
93 stats_set: Default::default(),
94 })
95 }
96
97 pub fn symbols(&self) -> &Buffer<Symbol> {
99 &self.symbols
100 }
101
102 pub fn symbol_lengths(&self) -> &Buffer<u8> {
104 &self.symbol_lengths
105 }
106
107 pub fn codes(&self) -> &VarBinArray {
109 &self.codes
110 }
111
112 #[inline]
114 pub fn codes_dtype(&self) -> &DType {
115 self.codes.dtype()
116 }
117
118 pub fn uncompressed_lengths(&self) -> &ArrayRef {
120 &self.uncompressed_lengths
121 }
122
123 #[inline]
125 pub fn uncompressed_lengths_dtype(&self) -> &DType {
126 self.uncompressed_lengths.dtype()
127 }
128
129 pub(crate) fn decompressor(&self) -> Decompressor {
134 Decompressor::new(self.symbols().as_slice(), self.symbol_lengths().as_slice())
135 }
136}
137
138impl ArrayVTable<FSSTVTable> for FSSTVTable {
139 fn len(array: &FSSTArray) -> usize {
140 array.codes().len()
141 }
142
143 fn dtype(array: &FSSTArray) -> &DType {
144 &array.dtype
145 }
146
147 fn stats(array: &FSSTArray) -> StatsSetRef<'_> {
148 array.stats_set.to_ref(array.as_ref())
149 }
150}
151
152impl ValidityChild<FSSTVTable> for FSSTVTable {
153 fn validity_child(array: &FSSTArray) -> &dyn Array {
154 array.codes().as_ref()
155 }
156}