1use zerocopy::IntoBytes;
8
9use crate::dtype::DType;
10use crate::layout::FillValue;
11use crate::timestamp::TimestampNs;
12
13pub trait ArrayElement: Clone + Send + Sync + 'static {
21 const DTYPE: DType;
23 fn encode_chunk(values: &[Self]) -> Vec<u8>;
25 fn decode_chunk(bytes: &[u8]) -> Vec<Self>;
28 fn fill_element(fill: Option<&FillValue>) -> Self;
32}
33
34fn encode_copy<T: Sized>(values: &[T]) -> Vec<u8> {
37 let byte_len = std::mem::size_of_val(values);
38 unsafe { std::slice::from_raw_parts(values.as_ptr() as *const u8, byte_len) }.to_vec()
39}
40
41fn decode_copy<T: Sized>(bytes: &[u8]) -> Vec<T> {
42 let elem = std::mem::size_of::<T>();
43 if bytes.is_empty() || elem == 0 {
44 return vec![];
45 }
46 let n = bytes.len() / elem;
47 let mut out: Vec<T> = Vec::with_capacity(n);
48 unsafe {
50 std::ptr::copy_nonoverlapping(bytes.as_ptr(), out.as_mut_ptr() as *mut u8, n * elem);
51 out.set_len(n);
52 }
53 out
54}
55
56macro_rules! impl_element_uint {
59 ($ty:ty, $variant:expr) => {
60 impl ArrayElement for $ty {
61 const DTYPE: DType = $variant;
62 fn encode_chunk(values: &[Self]) -> Vec<u8> {
63 encode_copy(values)
64 }
65 fn decode_chunk(bytes: &[u8]) -> Vec<Self> {
66 decode_copy(bytes)
67 }
68 fn fill_element(fill: Option<&FillValue>) -> Self {
69 match fill {
70 Some(FillValue::UInt(v)) => *v as $ty,
71 Some(FillValue::Int(v)) => *v as $ty,
72 Some(FillValue::Float(v)) => *v as $ty,
73 Some(FillValue::Bool(v)) => *v as u8 as $ty,
74 _ => 0,
75 }
76 }
77 }
78 };
79}
80
81macro_rules! impl_element_int {
82 ($ty:ty, $variant:expr) => {
83 impl ArrayElement for $ty {
84 const DTYPE: DType = $variant;
85 fn encode_chunk(values: &[Self]) -> Vec<u8> {
86 encode_copy(values)
87 }
88 fn decode_chunk(bytes: &[u8]) -> Vec<Self> {
89 decode_copy(bytes)
90 }
91 fn fill_element(fill: Option<&FillValue>) -> Self {
92 match fill {
93 Some(FillValue::Int(v)) => *v as $ty,
94 Some(FillValue::UInt(v)) => *v as $ty,
95 Some(FillValue::Float(v)) => *v as $ty,
96 _ => 0,
97 }
98 }
99 }
100 };
101}
102
103macro_rules! impl_element_float {
104 ($ty:ty, $variant:expr) => {
105 impl ArrayElement for $ty {
106 const DTYPE: DType = $variant;
107 fn encode_chunk(values: &[Self]) -> Vec<u8> {
108 encode_copy(values)
109 }
110 fn decode_chunk(bytes: &[u8]) -> Vec<Self> {
111 decode_copy(bytes)
112 }
113 fn fill_element(fill: Option<&FillValue>) -> Self {
114 match fill {
115 Some(FillValue::Float(v)) => *v as $ty,
116 Some(FillValue::Int(v)) => *v as $ty,
117 Some(FillValue::UInt(v)) => *v as $ty,
118 _ => 0.0,
119 }
120 }
121 }
122 };
123}
124
125impl_element_uint!(u8, DType::UInt8);
126impl_element_uint!(u16, DType::UInt16);
127impl_element_uint!(u32, DType::UInt32);
128impl_element_uint!(u64, DType::UInt64);
129impl_element_int!(i8, DType::Int8);
130impl_element_int!(i16, DType::Int16);
131impl_element_int!(i32, DType::Int32);
132impl_element_int!(i64, DType::Int64);
133impl_element_float!(f32, DType::Float32);
134impl_element_float!(f64, DType::Float64);
135
136impl ArrayElement for TimestampNs {
139 const DTYPE: DType = DType::TimestampNs;
140
141 fn encode_chunk(values: &[Self]) -> Vec<u8> {
142 values.as_bytes().to_vec()
143 }
144
145 fn decode_chunk(bytes: &[u8]) -> Vec<Self> {
146 let elem = std::mem::size_of::<Self>();
147 let n = bytes.len() / elem;
148 let mut out = vec![Self(0); n];
149 if n > 0 {
150 out.as_mut_bytes().copy_from_slice(&bytes[..n * elem]);
151 }
152 out
153 }
154
155 fn fill_element(fill: Option<&FillValue>) -> Self {
156 match fill {
157 Some(FillValue::TimestampNs(v)) => Self(*v),
158 Some(FillValue::Int(v)) => Self(*v),
159 _ => Self(0),
160 }
161 }
162}
163
164fn encode_offsets<'a>(slices: impl Iterator<Item = &'a [u8]>) -> Vec<u8> {
169 let slices: Vec<&[u8]> = slices.collect();
170 let mut offsets: Vec<u32> = Vec::with_capacity(slices.len() + 1);
171 let mut buf: Vec<u8> = Vec::new();
172 offsets.push(0);
173 for s in &slices {
174 buf.extend_from_slice(s);
175 offsets.push(buf.len() as u32);
176 }
177 let mut data = Vec::with_capacity(offsets.len() * 4 + buf.len());
178 for off in &offsets {
179 data.extend_from_slice(&off.to_le_bytes());
180 }
181 data.extend_from_slice(&buf);
182 data
183}
184
185fn decode_offsets(bytes: &[u8]) -> Vec<Vec<u8>> {
187 let n = {
188 let mut n = 0usize;
189 loop {
190 let pos = (n + 1) * 4;
191 if pos + 4 > bytes.len() {
192 break;
193 }
194 let off = u32::from_le_bytes(bytes[pos..pos + 4].try_into().unwrap()) as usize;
195 if pos + 4 + off == bytes.len() {
196 n += 1;
197 break;
198 }
199 n += 1;
200 }
201 n
202 };
203 if n == 0 {
204 return vec![];
205 }
206 let values_base = (n + 1) * 4;
207 (0..n)
208 .map(|i| {
209 let start = u32::from_le_bytes(bytes[i * 4..i * 4 + 4].try_into().unwrap()) as usize;
210 let end = u32::from_le_bytes(bytes[(i + 1) * 4..(i + 1) * 4 + 4].try_into().unwrap())
211 as usize;
212 bytes[values_base + start..values_base + end].to_vec()
213 })
214 .collect()
215}
216
217impl ArrayElement for String {
220 const DTYPE: DType = DType::String;
221
222 fn encode_chunk(values: &[Self]) -> Vec<u8> {
223 encode_offsets(values.iter().map(|s| s.as_bytes()))
224 }
225
226 fn decode_chunk(bytes: &[u8]) -> Vec<Self> {
227 decode_offsets(bytes)
228 .into_iter()
229 .map(|b| String::from_utf8_lossy(&b).into_owned())
230 .collect()
231 }
232
233 fn fill_element(_fill: Option<&FillValue>) -> Self {
234 String::new()
235 }
236}
237
238impl ArrayElement for Vec<u8> {
241 const DTYPE: DType = DType::Binary;
242
243 fn encode_chunk(values: &[Self]) -> Vec<u8> {
244 encode_offsets(values.iter().map(|v| v.as_slice()))
245 }
246
247 fn decode_chunk(bytes: &[u8]) -> Vec<Self> {
248 decode_offsets(bytes)
249 }
250
251 fn fill_element(_fill: Option<&FillValue>) -> Self {
252 Vec::new()
253 }
254}
255
256#[cfg(test)]
257mod tests {
258 use super::*;
259
260 #[test]
261 fn numeric_roundtrip_f32() {
262 let values = vec![1.0f32, 2.5, 3.5];
263 let bytes = f32::encode_chunk(&values);
264 assert_eq!(bytes.len(), 12);
265 let back = f32::decode_chunk(&bytes);
266 assert_eq!(back, values);
267 }
268
269 #[test]
270 fn numeric_roundtrip_i32() {
271 let values = vec![-1i32, 0, 42, i32::MAX];
272 let bytes = i32::encode_chunk(&values);
273 let back = i32::decode_chunk(&bytes);
274 assert_eq!(back, values);
275 }
276
277 #[test]
278 fn string_roundtrip() {
279 let values = vec!["hello".to_string(), "".to_string(), "world!".to_string()];
280 let bytes = String::encode_chunk(&values);
281 let back = String::decode_chunk(&bytes);
282 assert_eq!(back, values);
283 }
284
285 #[test]
286 fn binary_roundtrip() {
287 let values = vec![vec![1u8, 2, 3], vec![], vec![255]];
288 let bytes = Vec::<u8>::encode_chunk(&values);
289 let back = Vec::<u8>::decode_chunk(&bytes);
290 assert_eq!(back, values);
291 }
292
293 #[test]
294 fn fill_element_numeric() {
295 assert_eq!(i32::fill_element(Some(&FillValue::Int(-7))), -7i32);
296 assert_eq!(f64::fill_element(Some(&FillValue::Float(1.5))), 1.5f64);
297 assert_eq!(u8::fill_element(None), 0u8);
298 }
299
300 #[test]
301 fn fill_element_vlen_ignores_fill() {
302 assert_eq!(String::fill_element(Some(&FillValue::Int(99))), "");
303 assert_eq!(Vec::<u8>::fill_element(None), Vec::<u8>::new());
304 }
305
306 #[test]
307 fn decode_empty() {
308 assert_eq!(i32::decode_chunk(&[]), Vec::<i32>::new());
309 assert_eq!(String::decode_chunk(&[]), Vec::<String>::new());
310 }
311
312 #[test]
313 fn timestamp_roundtrip() {
314 let values = vec![
315 TimestampNs(0),
316 TimestampNs(1_700_000_000_000_000_000),
317 TimestampNs(-1),
318 TimestampNs(i64::MAX),
319 TimestampNs(i64::MIN),
320 ];
321 let bytes = TimestampNs::encode_chunk(&values);
322 assert_eq!(bytes.len(), values.len() * 8);
323 let back = TimestampNs::decode_chunk(&bytes);
324 assert_eq!(back, values);
325 }
326
327 #[test]
328 fn timestamp_fill_element() {
329 assert_eq!(
330 TimestampNs::fill_element(Some(&FillValue::TimestampNs(123))),
331 TimestampNs(123)
332 );
333 assert_eq!(
334 TimestampNs::fill_element(Some(&FillValue::Int(7))),
335 TimestampNs(7)
336 );
337 assert_eq!(TimestampNs::fill_element(None), TimestampNs(0));
338 }
339}