1use zerocopy::IntoBytes;
2
3use crate::dtype::DType;
4use crate::layout::FillValue;
5use crate::timestamp::TimestampNs;
6
7pub trait ArrayElement: Clone + Send + Sync + 'static {
15 const DTYPE: DType;
17 fn encode_chunk(values: &[Self]) -> Vec<u8>;
19 fn decode_chunk(bytes: &[u8]) -> Vec<Self>;
22 fn fill_element(fill: Option<&FillValue>) -> Self;
26}
27
28fn encode_copy<T: Sized>(values: &[T]) -> Vec<u8> {
31 let byte_len = std::mem::size_of_val(values);
32 unsafe { std::slice::from_raw_parts(values.as_ptr() as *const u8, byte_len) }.to_vec()
33}
34
35fn decode_copy<T: Sized>(bytes: &[u8]) -> Vec<T> {
36 let elem = std::mem::size_of::<T>();
37 if bytes.is_empty() || elem == 0 {
38 return vec![];
39 }
40 let n = bytes.len() / elem;
41 let mut out: Vec<T> = Vec::with_capacity(n);
42 unsafe {
44 std::ptr::copy_nonoverlapping(bytes.as_ptr(), out.as_mut_ptr() as *mut u8, n * elem);
45 out.set_len(n);
46 }
47 out
48}
49
50macro_rules! impl_element_uint {
53 ($ty:ty, $variant:expr) => {
54 impl ArrayElement for $ty {
55 const DTYPE: DType = $variant;
56 fn encode_chunk(values: &[Self]) -> Vec<u8> {
57 encode_copy(values)
58 }
59 fn decode_chunk(bytes: &[u8]) -> Vec<Self> {
60 decode_copy(bytes)
61 }
62 fn fill_element(fill: Option<&FillValue>) -> Self {
63 match fill {
64 Some(FillValue::UInt(v)) => *v as $ty,
65 Some(FillValue::Int(v)) => *v as $ty,
66 Some(FillValue::Float(v)) => *v as $ty,
67 Some(FillValue::Bool(v)) => *v as u8 as $ty,
68 _ => 0,
69 }
70 }
71 }
72 };
73}
74
75macro_rules! impl_element_int {
76 ($ty:ty, $variant:expr) => {
77 impl ArrayElement for $ty {
78 const DTYPE: DType = $variant;
79 fn encode_chunk(values: &[Self]) -> Vec<u8> {
80 encode_copy(values)
81 }
82 fn decode_chunk(bytes: &[u8]) -> Vec<Self> {
83 decode_copy(bytes)
84 }
85 fn fill_element(fill: Option<&FillValue>) -> Self {
86 match fill {
87 Some(FillValue::Int(v)) => *v as $ty,
88 Some(FillValue::UInt(v)) => *v as $ty,
89 Some(FillValue::Float(v)) => *v as $ty,
90 _ => 0,
91 }
92 }
93 }
94 };
95}
96
97macro_rules! impl_element_float {
98 ($ty:ty, $variant:expr) => {
99 impl ArrayElement for $ty {
100 const DTYPE: DType = $variant;
101 fn encode_chunk(values: &[Self]) -> Vec<u8> {
102 encode_copy(values)
103 }
104 fn decode_chunk(bytes: &[u8]) -> Vec<Self> {
105 decode_copy(bytes)
106 }
107 fn fill_element(fill: Option<&FillValue>) -> Self {
108 match fill {
109 Some(FillValue::Float(v)) => *v as $ty,
110 Some(FillValue::Int(v)) => *v as $ty,
111 Some(FillValue::UInt(v)) => *v as $ty,
112 _ => 0.0,
113 }
114 }
115 }
116 };
117}
118
119impl_element_uint!(u8, DType::UInt8);
120impl_element_uint!(u16, DType::UInt16);
121impl_element_uint!(u32, DType::UInt32);
122impl_element_uint!(u64, DType::UInt64);
123impl_element_int!(i8, DType::Int8);
124impl_element_int!(i16, DType::Int16);
125impl_element_int!(i32, DType::Int32);
126impl_element_int!(i64, DType::Int64);
127impl_element_float!(f32, DType::Float32);
128impl_element_float!(f64, DType::Float64);
129
130impl ArrayElement for TimestampNs {
133 const DTYPE: DType = DType::TimestampNs;
134
135 fn encode_chunk(values: &[Self]) -> Vec<u8> {
136 values.as_bytes().to_vec()
137 }
138
139 fn decode_chunk(bytes: &[u8]) -> Vec<Self> {
140 let elem = std::mem::size_of::<Self>();
141 let n = bytes.len() / elem;
142 let mut out = vec![Self(0); n];
143 if n > 0 {
144 out.as_mut_bytes().copy_from_slice(&bytes[..n * elem]);
145 }
146 out
147 }
148
149 fn fill_element(fill: Option<&FillValue>) -> Self {
150 match fill {
151 Some(FillValue::TimestampNs(v)) => Self(*v),
152 Some(FillValue::Int(v)) => Self(*v),
153 _ => Self(0),
154 }
155 }
156}
157
158fn encode_offsets<'a>(slices: impl Iterator<Item = &'a [u8]>) -> Vec<u8> {
163 let slices: Vec<&[u8]> = slices.collect();
164 let mut offsets: Vec<u32> = Vec::with_capacity(slices.len() + 1);
165 let mut buf: Vec<u8> = Vec::new();
166 offsets.push(0);
167 for s in &slices {
168 buf.extend_from_slice(s);
169 offsets.push(buf.len() as u32);
170 }
171 let mut data = Vec::with_capacity(offsets.len() * 4 + buf.len());
172 for off in &offsets {
173 data.extend_from_slice(&off.to_le_bytes());
174 }
175 data.extend_from_slice(&buf);
176 data
177}
178
179fn decode_offsets(bytes: &[u8]) -> Vec<Vec<u8>> {
181 let n = {
182 let mut n = 0usize;
183 loop {
184 let pos = (n + 1) * 4;
185 if pos + 4 > bytes.len() {
186 break;
187 }
188 let off = u32::from_le_bytes(bytes[pos..pos + 4].try_into().unwrap()) as usize;
189 if pos + 4 + off == bytes.len() {
190 n += 1;
191 break;
192 }
193 n += 1;
194 }
195 n
196 };
197 if n == 0 {
198 return vec![];
199 }
200 let values_base = (n + 1) * 4;
201 (0..n)
202 .map(|i| {
203 let start = u32::from_le_bytes(bytes[i * 4..i * 4 + 4].try_into().unwrap()) as usize;
204 let end = u32::from_le_bytes(bytes[(i + 1) * 4..(i + 1) * 4 + 4].try_into().unwrap())
205 as usize;
206 bytes[values_base + start..values_base + end].to_vec()
207 })
208 .collect()
209}
210
211impl ArrayElement for String {
214 const DTYPE: DType = DType::String;
215
216 fn encode_chunk(values: &[Self]) -> Vec<u8> {
217 encode_offsets(values.iter().map(|s| s.as_bytes()))
218 }
219
220 fn decode_chunk(bytes: &[u8]) -> Vec<Self> {
221 decode_offsets(bytes)
222 .into_iter()
223 .map(|b| String::from_utf8_lossy(&b).into_owned())
224 .collect()
225 }
226
227 fn fill_element(_fill: Option<&FillValue>) -> Self {
228 String::new()
229 }
230}
231
232impl ArrayElement for Vec<u8> {
235 const DTYPE: DType = DType::Binary;
236
237 fn encode_chunk(values: &[Self]) -> Vec<u8> {
238 encode_offsets(values.iter().map(|v| v.as_slice()))
239 }
240
241 fn decode_chunk(bytes: &[u8]) -> Vec<Self> {
242 decode_offsets(bytes)
243 }
244
245 fn fill_element(_fill: Option<&FillValue>) -> Self {
246 Vec::new()
247 }
248}
249
250#[cfg(test)]
251mod tests {
252 use super::*;
253
254 #[test]
255 fn numeric_roundtrip_f32() {
256 let values = vec![1.0f32, 2.5, 3.5];
257 let bytes = f32::encode_chunk(&values);
258 assert_eq!(bytes.len(), 12);
259 let back = f32::decode_chunk(&bytes);
260 assert_eq!(back, values);
261 }
262
263 #[test]
264 fn numeric_roundtrip_i32() {
265 let values = vec![-1i32, 0, 42, i32::MAX];
266 let bytes = i32::encode_chunk(&values);
267 let back = i32::decode_chunk(&bytes);
268 assert_eq!(back, values);
269 }
270
271 #[test]
272 fn string_roundtrip() {
273 let values = vec!["hello".to_string(), "".to_string(), "world!".to_string()];
274 let bytes = String::encode_chunk(&values);
275 let back = String::decode_chunk(&bytes);
276 assert_eq!(back, values);
277 }
278
279 #[test]
280 fn binary_roundtrip() {
281 let values = vec![vec![1u8, 2, 3], vec![], vec![255]];
282 let bytes = Vec::<u8>::encode_chunk(&values);
283 let back = Vec::<u8>::decode_chunk(&bytes);
284 assert_eq!(back, values);
285 }
286
287 #[test]
288 fn fill_element_numeric() {
289 assert_eq!(i32::fill_element(Some(&FillValue::Int(-7))), -7i32);
290 assert_eq!(f64::fill_element(Some(&FillValue::Float(1.5))), 1.5f64);
291 assert_eq!(u8::fill_element(None), 0u8);
292 }
293
294 #[test]
295 fn fill_element_vlen_ignores_fill() {
296 assert_eq!(String::fill_element(Some(&FillValue::Int(99))), "");
297 assert_eq!(Vec::<u8>::fill_element(None), Vec::<u8>::new());
298 }
299
300 #[test]
301 fn decode_empty() {
302 assert_eq!(i32::decode_chunk(&[]), Vec::<i32>::new());
303 assert_eq!(String::decode_chunk(&[]), Vec::<String>::new());
304 }
305
306 #[test]
307 fn timestamp_roundtrip() {
308 let values = vec![
309 TimestampNs(0),
310 TimestampNs(1_700_000_000_000_000_000),
311 TimestampNs(-1),
312 TimestampNs(i64::MAX),
313 TimestampNs(i64::MIN),
314 ];
315 let bytes = TimestampNs::encode_chunk(&values);
316 assert_eq!(bytes.len(), values.len() * 8);
317 let back = TimestampNs::decode_chunk(&bytes);
318 assert_eq!(back, values);
319 }
320
321 #[test]
322 fn timestamp_fill_element() {
323 assert_eq!(
324 TimestampNs::fill_element(Some(&FillValue::TimestampNs(123))),
325 TimestampNs(123)
326 );
327 assert_eq!(
328 TimestampNs::fill_element(Some(&FillValue::Int(7))),
329 TimestampNs(7)
330 );
331 assert_eq!(TimestampNs::fill_element(None), TimestampNs(0));
332 }
333}