pancake_db_core/encoding/
mod.rs1use pancake_db_idl::dml::FieldValue;
2use pancake_db_idl::dtype::DataType;
3use q_compress::data_types::TimestampMicros;
4
5pub use decoder::ByteIdx;
6pub use decoder::Decodable;
7pub use decoder::Decoder;
8pub use decoder::DecoderImpl;
9pub use encoder::Encoder;
10pub use encoder::EncoderImpl;
11
12use crate::primitives::Primitive;
13
14mod byte_reader;
15mod decoder;
16mod encoder;
17
18const ESCAPE_BYTE: u8 = 255;
19const COUNT_BYTE: u8 = 254;
20const NULL_BYTE: u8 = 253;
21
22fn encoder_for<P: Primitive>(nested_list_depth: u8) -> Box<dyn Encoder> {
23 Box::new(EncoderImpl::<P>::new(nested_list_depth))
24}
25
26fn field_value_decoder_for<P: Primitive>(nested_list_depth: u8) -> Box<dyn Decoder<FieldValue>> {
27 Box::new(DecoderImpl::<P, FieldValue>::new(nested_list_depth))
28}
29
30fn byte_idx_decoder_for<P: Primitive>(nested_list_depth: u8) -> Box<dyn Decoder<ByteIdx>> {
31 Box::new(DecoderImpl::<P, ByteIdx>::new(nested_list_depth))
32}
33
34pub fn new_encoder(dtype: DataType, nested_list_depth: u8) -> Box<dyn Encoder> {
35 match dtype {
36 DataType::Int64 => encoder_for::<i64>(nested_list_depth),
37 DataType::String => encoder_for::<String>(nested_list_depth),
38 DataType::Float32 => encoder_for::<f32>(nested_list_depth),
39 DataType::Float64 => encoder_for::<f64>(nested_list_depth),
40 DataType::Bytes => encoder_for::<Vec<u8>>(nested_list_depth),
41 DataType::Bool => encoder_for::<bool>(nested_list_depth),
42 DataType::TimestampMicros => encoder_for::<TimestampMicros>(nested_list_depth),
43 }
44}
45
46pub fn new_field_value_decoder(dtype: DataType, nested_list_depth: u8) -> Box<dyn Decoder<FieldValue>> {
47 match dtype {
48 DataType::Int64 => field_value_decoder_for::<i64>(nested_list_depth),
49 DataType::String => field_value_decoder_for::<String>(nested_list_depth),
50 DataType::Float32 => field_value_decoder_for::<f32>(nested_list_depth),
51 DataType::Float64 => field_value_decoder_for::<f64>(nested_list_depth),
52 DataType::Bytes => field_value_decoder_for::<Vec<u8>>(nested_list_depth),
53 DataType::Bool => field_value_decoder_for::<bool>(nested_list_depth),
54 DataType::TimestampMicros => field_value_decoder_for::<TimestampMicros>(nested_list_depth),
55 }
56}
57
58pub fn new_byte_idx_decoder(dtype: DataType, nested_list_depth: u8) -> Box<dyn Decoder<ByteIdx>> {
59 match dtype {
60 DataType::Int64 => byte_idx_decoder_for::<i64>(nested_list_depth),
61 DataType::String => byte_idx_decoder_for::<String>(nested_list_depth),
62 DataType::Float32 => byte_idx_decoder_for::<f32>(nested_list_depth),
63 DataType::Float64 => byte_idx_decoder_for::<f64>(nested_list_depth),
64 DataType::Bytes => byte_idx_decoder_for::<Vec<u8>>(nested_list_depth),
65 DataType::Bool => byte_idx_decoder_for::<bool>(nested_list_depth),
66 DataType::TimestampMicros => byte_idx_decoder_for::<TimestampMicros>(nested_list_depth),
67 }
68}
69
70#[cfg(test)]
71mod tests {
72 use pancake_db_idl::dml::{FieldValue, RepeatedFieldValue};
73 use pancake_db_idl::dml::field_value::Value;
74
75 use crate::errors::CoreResult;
76 use crate::primitives::Primitive;
77
78 use super::*;
79 use crate::rep_levels::RepLevelsAndAtoms;
80
81 fn build_list_val(l: Vec<Value>) -> Value {
82 Value::ListVal(RepeatedFieldValue {
83 vals: l.iter().map(|x| FieldValue {
84 value: Some(x.clone()),
85 }).collect(),
86 })
87 }
88
89 fn encode<P: Primitive>(fvs: &[FieldValue], escape_depth: u8) -> CoreResult<Vec<u8>> {
90 let encoder = EncoderImpl::<P>::new(escape_depth);
91 encoder.encode(fvs)
92 }
93
94 fn decode<P: Primitive>(encoded: &[u8], escape_depth: u8) -> CoreResult<Vec<FieldValue>> {
95 let decoder = DecoderImpl::<P, FieldValue>::new(escape_depth);
96 decoder.decode(encoded)
97 }
98
99 #[test]
100 fn test_bytess() -> CoreResult<()> {
101 let bytess = vec![
102 Some(vec![0_u8, 255, 255, 254, 253]), None,
104 Some(vec![]),
105 Some(vec![77].repeat(2081))
106 ];
107
108 let values = bytess.iter()
109 .map(|maybe_bytes| FieldValue {
110 value: maybe_bytes.as_ref().map(|bytes| Value::BytesVal(bytes.to_vec())),
111 })
112 .collect::<Vec<FieldValue>>();
113
114 let encoded = encode::<Vec<u8>>(&values, 0)?;
115 let decoded = decode::<Vec<u8>>(&encoded, 0)?;
116 let recovered = decoded.iter()
117 .map(|fv| fv.value.as_ref().map(|v| match v {
118 Value::BytesVal(b) => b.clone(),
119 _ => panic!(),
120 }))
121 .collect::<Vec<Option<Vec<u8>>>>();
122
123 assert_eq!(recovered, bytess);
124 Ok(())
125 }
126
127 #[test]
128 fn test_ints() -> CoreResult<()> {
129 let ints: Vec<Option<i64>> = vec![
130 Some(i64::MIN),
131 Some(i64::MAX),
132 None,
133 Some(0),
134 Some(-1),
135 ];
136
137 let values = ints.iter()
138 .map(|maybe_x| FieldValue {
139 value: maybe_x.map(|x| Value::Int64Val(x)),
140 })
141 .collect::<Vec<FieldValue>>();
142
143 let encoded = encode::<i64>(&values, 0)?;
144 let decoded = decode::<i64>(&encoded, 0)?;
145 let recovered = decoded.iter()
146 .map(|fv| fv.value.as_ref().map(|v| match v {
147 Value::Int64Val(x) => *x,
148 _ => panic!(),
149 }))
150 .collect::<Vec<Option<i64>>>();
151
152 assert_eq!(recovered, ints);
153 Ok(())
154 }
155
156 #[test]
157 fn test_nested_strings() -> CoreResult<()> {
158 let strings = vec![
159 Some(vec![
160 vec!["azAZ09﹝ツツツ﹞ꗽꗼ".to_string(), "abc".to_string()],
161 vec!["/\\''!@#$%^&*()".to_string()],
162 ]),
163 None,
164 Some(vec![
165 vec!["".to_string()],
166 vec!["z".repeat(2)],
167 vec!["null".to_string()]
168 ]),
169 Some(vec![vec![]]),
170 Some(vec![])
171 ];
172
173 let values = strings.iter()
174 .map(|maybe_x| FieldValue {
175 value: maybe_x.as_ref().map(|x0| build_list_val(
176 x0.iter().map(|x1| build_list_val(
177 x1.iter().map(|x2| Value::StringVal(x2.to_string())).collect()
178 )).collect()
179 )),
180 })
181 .collect::<Vec<FieldValue>>();
182
183 let encoded = encode::<String>(&values, 2)?;
184 let decoded = decode::<String>(&encoded, 2)?;
185 let recovered = decoded.iter()
186 .map(|fv| fv.value.as_ref().map(|v| match v {
187 Value::ListVal(RepeatedFieldValue { vals }) => vals.iter()
188 .map(|fv| match fv.value.as_ref().unwrap() {
189 Value::ListVal(RepeatedFieldValue { vals }) => vals.iter()
190 .map(|fv| match fv.value.as_ref().unwrap() {
191 Value::StringVal(s) => s.to_string(),
192 _ => panic!(),
193 })
194 .collect(),
195 _ => panic!()
196 })
197 .collect(),
198 _ => panic!()
199 }))
200 .collect::<Vec<Option<Vec<Vec<String>>>>>();
201
202 assert_eq!(recovered, strings);
203 Ok(())
204 }
205
206 #[test]
207 fn test_decode_rep_levels() -> CoreResult<()> {
208 let strings = vec![
209 Some(vec![
210 "abc".to_string(),
211 "de".to_string(),
212 ]),
213 None,
214 Some(vec![
215 "f".to_string(),
216 ]),
217 Some(vec!["".to_string()]),
218 Some(vec![])
219 ];
220
221 let values = strings.iter()
222 .map(|maybe_x| FieldValue {
223 value: maybe_x.as_ref().map(|x0| build_list_val(
224 x0.iter().map(|x1| Value::StringVal(x1.to_string())).collect()
225 )),
226 })
227 .collect::<Vec<FieldValue>>();
228
229 let encoded = encode::<String>(&values, 1)?;
230 let decoder = DecoderImpl::<String, RepLevelsAndAtoms<u8>>::new(1);
231 let decoded = decoder.decode(&encoded)?;
232 let mut combined = RepLevelsAndAtoms::default();
233 for x in &decoded {
234 combined.extend(x);
235 }
236 assert_eq!(
237 combined.levels,
238 vec![
239 3, 3, 3, 2,
240 3, 3, 2, 1,
241 0,
242 3, 2, 1,
243 2, 1,
244 1,
245 ]
246 );
247 assert_eq!(
248 combined.atoms,
249 vec![97_u8, 98, 99, 100, 101, 102] );
251 Ok(())
252 }
253}