pancake_db_core/encoding/
mod.rs

1use pancake_db_idl::dml::FieldValue;
2use pancake_db_idl::dtype::DataType;
3use q_compress::data_types::TimestampMicros;
4
5pub use decoder::ByteIdx;
6pub use decoder::Decodable;
7pub use decoder::Decoder;
8pub use decoder::DecoderImpl;
9pub use encoder::Encoder;
10pub use encoder::EncoderImpl;
11
12use crate::primitives::Primitive;
13
14mod byte_reader;
15mod decoder;
16mod encoder;
17
18const ESCAPE_BYTE: u8 = 255;
19const COUNT_BYTE: u8 = 254;
20const NULL_BYTE: u8 = 253;
21
22fn encoder_for<P: Primitive>(nested_list_depth: u8) -> Box<dyn Encoder> {
23  Box::new(EncoderImpl::<P>::new(nested_list_depth))
24}
25
26fn field_value_decoder_for<P: Primitive>(nested_list_depth: u8) -> Box<dyn Decoder<FieldValue>> {
27  Box::new(DecoderImpl::<P, FieldValue>::new(nested_list_depth))
28}
29
30fn byte_idx_decoder_for<P: Primitive>(nested_list_depth: u8) -> Box<dyn Decoder<ByteIdx>> {
31  Box::new(DecoderImpl::<P, ByteIdx>::new(nested_list_depth))
32}
33
34pub fn new_encoder(dtype: DataType, nested_list_depth: u8) -> Box<dyn Encoder> {
35  match dtype {
36    DataType::Int64 => encoder_for::<i64>(nested_list_depth),
37    DataType::String => encoder_for::<String>(nested_list_depth),
38    DataType::Float32 => encoder_for::<f32>(nested_list_depth),
39    DataType::Float64 => encoder_for::<f64>(nested_list_depth),
40    DataType::Bytes => encoder_for::<Vec<u8>>(nested_list_depth),
41    DataType::Bool => encoder_for::<bool>(nested_list_depth),
42    DataType::TimestampMicros => encoder_for::<TimestampMicros>(nested_list_depth),
43  }
44}
45
46pub fn new_field_value_decoder(dtype: DataType, nested_list_depth: u8) -> Box<dyn Decoder<FieldValue>> {
47  match dtype {
48    DataType::Int64 => field_value_decoder_for::<i64>(nested_list_depth),
49    DataType::String => field_value_decoder_for::<String>(nested_list_depth),
50    DataType::Float32 => field_value_decoder_for::<f32>(nested_list_depth),
51    DataType::Float64 => field_value_decoder_for::<f64>(nested_list_depth),
52    DataType::Bytes => field_value_decoder_for::<Vec<u8>>(nested_list_depth),
53    DataType::Bool => field_value_decoder_for::<bool>(nested_list_depth),
54    DataType::TimestampMicros => field_value_decoder_for::<TimestampMicros>(nested_list_depth),
55  }
56}
57
58pub fn new_byte_idx_decoder(dtype: DataType, nested_list_depth: u8) -> Box<dyn Decoder<ByteIdx>> {
59  match dtype {
60    DataType::Int64 => byte_idx_decoder_for::<i64>(nested_list_depth),
61    DataType::String => byte_idx_decoder_for::<String>(nested_list_depth),
62    DataType::Float32 => byte_idx_decoder_for::<f32>(nested_list_depth),
63    DataType::Float64 => byte_idx_decoder_for::<f64>(nested_list_depth),
64    DataType::Bytes => byte_idx_decoder_for::<Vec<u8>>(nested_list_depth),
65    DataType::Bool => byte_idx_decoder_for::<bool>(nested_list_depth),
66    DataType::TimestampMicros => byte_idx_decoder_for::<TimestampMicros>(nested_list_depth),
67  }
68}
69
70#[cfg(test)]
71mod tests {
72  use pancake_db_idl::dml::{FieldValue, RepeatedFieldValue};
73  use pancake_db_idl::dml::field_value::Value;
74
75  use crate::errors::CoreResult;
76  use crate::primitives::Primitive;
77
78  use super::*;
79  use crate::rep_levels::RepLevelsAndAtoms;
80
81  fn build_list_val(l: Vec<Value>) -> Value {
82    Value::ListVal(RepeatedFieldValue {
83      vals: l.iter().map(|x| FieldValue {
84        value: Some(x.clone()),
85      }).collect(),
86    })
87  }
88
89  fn encode<P: Primitive>(fvs: &[FieldValue], escape_depth: u8) -> CoreResult<Vec<u8>> {
90    let encoder = EncoderImpl::<P>::new(escape_depth);
91    encoder.encode(fvs)
92  }
93
94  fn decode<P: Primitive>(encoded: &[u8], escape_depth: u8) -> CoreResult<Vec<FieldValue>> {
95    let decoder = DecoderImpl::<P, FieldValue>::new(escape_depth);
96    decoder.decode(encoded)
97  }
98
99  #[test]
100  fn test_bytess() -> CoreResult<()> {
101    let bytess = vec![
102      Some(vec![0_u8, 255, 255, 254, 253]), // some bytes that need escaping
103      None,
104      Some(vec![]),
105      Some(vec![77].repeat(2081))
106    ];
107
108    let values = bytess.iter()
109      .map(|maybe_bytes| FieldValue {
110        value: maybe_bytes.as_ref().map(|bytes| Value::BytesVal(bytes.to_vec())),
111      })
112      .collect::<Vec<FieldValue>>();
113
114    let encoded = encode::<Vec<u8>>(&values, 0)?;
115    let decoded = decode::<Vec<u8>>(&encoded, 0)?;
116    let recovered = decoded.iter()
117      .map(|fv| fv.value.as_ref().map(|v| match v {
118        Value::BytesVal(b) => b.clone(),
119        _ => panic!(),
120      }))
121      .collect::<Vec<Option<Vec<u8>>>>();
122
123    assert_eq!(recovered, bytess);
124    Ok(())
125  }
126
127  #[test]
128  fn test_ints() -> CoreResult<()> {
129    let ints: Vec<Option<i64>> = vec![
130      Some(i64::MIN),
131      Some(i64::MAX),
132      None,
133      Some(0),
134      Some(-1),
135    ];
136
137    let values = ints.iter()
138      .map(|maybe_x| FieldValue {
139        value: maybe_x.map(|x| Value::Int64Val(x)),
140      })
141      .collect::<Vec<FieldValue>>();
142
143    let encoded = encode::<i64>(&values, 0)?;
144    let decoded = decode::<i64>(&encoded, 0)?;
145    let recovered = decoded.iter()
146      .map(|fv| fv.value.as_ref().map(|v| match v {
147        Value::Int64Val(x) => *x,
148        _ => panic!(),
149      }))
150      .collect::<Vec<Option<i64>>>();
151
152    assert_eq!(recovered, ints);
153    Ok(())
154  }
155
156  #[test]
157  fn test_nested_strings() -> CoreResult<()> {
158    let strings = vec![
159      Some(vec![
160        vec!["azAZ09﹝ツツツ﹞ꗽꗼ".to_string(), "abc".to_string()],
161        vec!["/\\''!@#$%^&*()".to_string()],
162      ]),
163      None,
164      Some(vec![
165        vec!["".to_string()],
166        vec!["z".repeat(2)],
167        vec!["null".to_string()]
168      ]),
169      Some(vec![vec![]]),
170      Some(vec![])
171    ];
172
173    let values = strings.iter()
174      .map(|maybe_x| FieldValue {
175        value: maybe_x.as_ref().map(|x0| build_list_val(
176          x0.iter().map(|x1| build_list_val(
177            x1.iter().map(|x2| Value::StringVal(x2.to_string())).collect()
178          )).collect()
179        )),
180      })
181      .collect::<Vec<FieldValue>>();
182
183    let encoded = encode::<String>(&values, 2)?;
184    let decoded = decode::<String>(&encoded, 2)?;
185    let recovered = decoded.iter()
186      .map(|fv| fv.value.as_ref().map(|v| match v {
187        Value::ListVal(RepeatedFieldValue { vals }) => vals.iter()
188          .map(|fv| match fv.value.as_ref().unwrap() {
189            Value::ListVal(RepeatedFieldValue { vals }) => vals.iter()
190              .map(|fv| match fv.value.as_ref().unwrap() {
191                Value::StringVal(s) => s.to_string(),
192                _ => panic!(),
193              })
194              .collect(),
195            _ => panic!()
196          })
197          .collect(),
198        _ => panic!()
199      }))
200      .collect::<Vec<Option<Vec<Vec<String>>>>>();
201
202    assert_eq!(recovered, strings);
203    Ok(())
204  }
205
206  #[test]
207  fn test_decode_rep_levels() -> CoreResult<()> {
208    let strings = vec![
209      Some(vec![
210        "abc".to_string(),
211        "de".to_string(),
212      ]),
213      None,
214      Some(vec![
215        "f".to_string(),
216      ]),
217      Some(vec!["".to_string()]),
218      Some(vec![])
219    ];
220
221    let values = strings.iter()
222      .map(|maybe_x| FieldValue {
223        value: maybe_x.as_ref().map(|x0| build_list_val(
224          x0.iter().map(|x1| Value::StringVal(x1.to_string())).collect()
225        )),
226      })
227      .collect::<Vec<FieldValue>>();
228
229    let encoded = encode::<String>(&values, 1)?;
230    let decoder = DecoderImpl::<String, RepLevelsAndAtoms<u8>>::new(1);
231    let decoded = decoder.decode(&encoded)?;
232    let mut combined = RepLevelsAndAtoms::default();
233    for x in &decoded {
234      combined.extend(x);
235    }
236    assert_eq!(
237      combined.levels,
238      vec![
239        3, 3, 3, 2,
240        3, 3, 2, 1,
241        0,
242        3, 2, 1,
243        2, 1,
244        1,
245      ]
246    );
247    assert_eq!(
248      combined.atoms,
249      vec![97_u8, 98, 99, 100, 101, 102] // a through f
250    );
251    Ok(())
252  }
253}