Skip to main content

array_format/
array.rs

1use zerocopy::IntoBytes;
2
3use crate::dtype::DType;
4use crate::layout::FillValue;
5use crate::timestamp::TimestampNs;
6
7// ── ArrayElement trait ──────────────────────────────────────────────
8
9/// Unified element type for all array operations.
10///
11/// Implemented by all fixed-width numeric primitives and variable-length
12/// types (`String`, `Vec<u8>`). The trait provides chunk-level encode/decode
13/// and fill-value generation so that a single generic code path handles both.
14pub trait ArrayElement: Clone + Send + Sync + 'static {
15    /// The [`DType`] this Rust type maps to on disk.
16    const DTYPE: DType;
17    /// Encodes a chunk's worth of values into the on-disk byte representation.
18    fn encode_chunk(values: &[Self]) -> Vec<u8>;
19    /// Decodes bytes produced by [`encode_chunk`](Self::encode_chunk) back into
20    /// values.
21    fn decode_chunk(bytes: &[u8]) -> Vec<Self>;
22    /// Returns the element used for unwritten positions, derived from the
23    /// array's `fill` value (or this type's natural default when `fill` is
24    /// `None` or not applicable).
25    fn fill_element(fill: Option<&FillValue>) -> Self;
26}
27
28// ── Shared helpers for fixed-width types ─────────────────────────────
29
30fn encode_copy<T: Sized>(values: &[T]) -> Vec<u8> {
31    let byte_len = std::mem::size_of_val(values);
32    unsafe { std::slice::from_raw_parts(values.as_ptr() as *const u8, byte_len) }.to_vec()
33}
34
35fn decode_copy<T: Sized>(bytes: &[u8]) -> Vec<T> {
36    let elem = std::mem::size_of::<T>();
37    if bytes.is_empty() || elem == 0 {
38        return vec![];
39    }
40    let n = bytes.len() / elem;
41    let mut out: Vec<T> = Vec::with_capacity(n);
42    // SAFETY: numeric primitives have no invalid bit patterns; Vec<T> is aligned.
43    unsafe {
44        std::ptr::copy_nonoverlapping(bytes.as_ptr(), out.as_mut_ptr() as *mut u8, n * elem);
45        out.set_len(n);
46    }
47    out
48}
49
50// ── Numeric implementations ──────────────────────────────────────────
51
52macro_rules! impl_element_uint {
53    ($ty:ty, $variant:expr) => {
54        impl ArrayElement for $ty {
55            const DTYPE: DType = $variant;
56            fn encode_chunk(values: &[Self]) -> Vec<u8> {
57                encode_copy(values)
58            }
59            fn decode_chunk(bytes: &[u8]) -> Vec<Self> {
60                decode_copy(bytes)
61            }
62            fn fill_element(fill: Option<&FillValue>) -> Self {
63                match fill {
64                    Some(FillValue::UInt(v)) => *v as $ty,
65                    Some(FillValue::Int(v)) => *v as $ty,
66                    Some(FillValue::Float(v)) => *v as $ty,
67                    Some(FillValue::Bool(v)) => *v as u8 as $ty,
68                    _ => 0,
69                }
70            }
71        }
72    };
73}
74
75macro_rules! impl_element_int {
76    ($ty:ty, $variant:expr) => {
77        impl ArrayElement for $ty {
78            const DTYPE: DType = $variant;
79            fn encode_chunk(values: &[Self]) -> Vec<u8> {
80                encode_copy(values)
81            }
82            fn decode_chunk(bytes: &[u8]) -> Vec<Self> {
83                decode_copy(bytes)
84            }
85            fn fill_element(fill: Option<&FillValue>) -> Self {
86                match fill {
87                    Some(FillValue::Int(v)) => *v as $ty,
88                    Some(FillValue::UInt(v)) => *v as $ty,
89                    Some(FillValue::Float(v)) => *v as $ty,
90                    _ => 0,
91                }
92            }
93        }
94    };
95}
96
97macro_rules! impl_element_float {
98    ($ty:ty, $variant:expr) => {
99        impl ArrayElement for $ty {
100            const DTYPE: DType = $variant;
101            fn encode_chunk(values: &[Self]) -> Vec<u8> {
102                encode_copy(values)
103            }
104            fn decode_chunk(bytes: &[u8]) -> Vec<Self> {
105                decode_copy(bytes)
106            }
107            fn fill_element(fill: Option<&FillValue>) -> Self {
108                match fill {
109                    Some(FillValue::Float(v)) => *v as $ty,
110                    Some(FillValue::Int(v)) => *v as $ty,
111                    Some(FillValue::UInt(v)) => *v as $ty,
112                    _ => 0.0,
113                }
114            }
115        }
116    };
117}
118
119impl_element_uint!(u8, DType::UInt8);
120impl_element_uint!(u16, DType::UInt16);
121impl_element_uint!(u32, DType::UInt32);
122impl_element_uint!(u64, DType::UInt64);
123impl_element_int!(i8, DType::Int8);
124impl_element_int!(i16, DType::Int16);
125impl_element_int!(i32, DType::Int32);
126impl_element_int!(i64, DType::Int64);
127impl_element_float!(f32, DType::Float32);
128impl_element_float!(f64, DType::Float64);
129
130// ── TimestampNs (wrapper around i64, zerocopy-backed) ────────────────
131
132impl ArrayElement for TimestampNs {
133    const DTYPE: DType = DType::TimestampNs;
134
135    fn encode_chunk(values: &[Self]) -> Vec<u8> {
136        values.as_bytes().to_vec()
137    }
138
139    fn decode_chunk(bytes: &[u8]) -> Vec<Self> {
140        let elem = std::mem::size_of::<Self>();
141        let n = bytes.len() / elem;
142        let mut out = vec![Self(0); n];
143        if n > 0 {
144            out.as_mut_bytes().copy_from_slice(&bytes[..n * elem]);
145        }
146        out
147    }
148
149    fn fill_element(fill: Option<&FillValue>) -> Self {
150        match fill {
151            Some(FillValue::TimestampNs(v)) => Self(*v),
152            Some(FillValue::Int(v)) => Self(*v),
153            _ => Self(0),
154        }
155    }
156}
157
158// ── Variable-length helpers ──────────────────────────────────────────
159
160/// Encodes a sequence of byte slices into the offset-buffer format:
161/// `[N+1 u32 LE offsets][concatenated values]`.
162fn encode_offsets<'a>(slices: impl Iterator<Item = &'a [u8]>) -> Vec<u8> {
163    let slices: Vec<&[u8]> = slices.collect();
164    let mut offsets: Vec<u32> = Vec::with_capacity(slices.len() + 1);
165    let mut buf: Vec<u8> = Vec::new();
166    offsets.push(0);
167    for s in &slices {
168        buf.extend_from_slice(s);
169        offsets.push(buf.len() as u32);
170    }
171    let mut data = Vec::with_capacity(offsets.len() * 4 + buf.len());
172    for off in &offsets {
173        data.extend_from_slice(&off.to_le_bytes());
174    }
175    data.extend_from_slice(&buf);
176    data
177}
178
179/// Decodes an offset-buffer chunk back to a `Vec<Vec<u8>>`.
180fn decode_offsets(bytes: &[u8]) -> Vec<Vec<u8>> {
181    let n = {
182        let mut n = 0usize;
183        loop {
184            let pos = (n + 1) * 4;
185            if pos + 4 > bytes.len() {
186                break;
187            }
188            let off = u32::from_le_bytes(bytes[pos..pos + 4].try_into().unwrap()) as usize;
189            if pos + 4 + off == bytes.len() {
190                n += 1;
191                break;
192            }
193            n += 1;
194        }
195        n
196    };
197    if n == 0 {
198        return vec![];
199    }
200    let values_base = (n + 1) * 4;
201    (0..n)
202        .map(|i| {
203            let start = u32::from_le_bytes(bytes[i * 4..i * 4 + 4].try_into().unwrap()) as usize;
204            let end = u32::from_le_bytes(bytes[(i + 1) * 4..(i + 1) * 4 + 4].try_into().unwrap())
205                as usize;
206            bytes[values_base + start..values_base + end].to_vec()
207        })
208        .collect()
209}
210
211// ── String ───────────────────────────────────────────────────────────
212
213impl ArrayElement for String {
214    const DTYPE: DType = DType::String;
215
216    fn encode_chunk(values: &[Self]) -> Vec<u8> {
217        encode_offsets(values.iter().map(|s| s.as_bytes()))
218    }
219
220    fn decode_chunk(bytes: &[u8]) -> Vec<Self> {
221        decode_offsets(bytes)
222            .into_iter()
223            .map(|b| String::from_utf8_lossy(&b).into_owned())
224            .collect()
225    }
226
227    fn fill_element(_fill: Option<&FillValue>) -> Self {
228        String::new()
229    }
230}
231
232// ── Vec<u8> ──────────────────────────────────────────────────────────
233
234impl ArrayElement for Vec<u8> {
235    const DTYPE: DType = DType::Binary;
236
237    fn encode_chunk(values: &[Self]) -> Vec<u8> {
238        encode_offsets(values.iter().map(|v| v.as_slice()))
239    }
240
241    fn decode_chunk(bytes: &[u8]) -> Vec<Self> {
242        decode_offsets(bytes)
243    }
244
245    fn fill_element(_fill: Option<&FillValue>) -> Self {
246        Vec::new()
247    }
248}
249
250#[cfg(test)]
251mod tests {
252    use super::*;
253
254    #[test]
255    fn numeric_roundtrip_f32() {
256        let values = vec![1.0f32, 2.5, 3.5];
257        let bytes = f32::encode_chunk(&values);
258        assert_eq!(bytes.len(), 12);
259        let back = f32::decode_chunk(&bytes);
260        assert_eq!(back, values);
261    }
262
263    #[test]
264    fn numeric_roundtrip_i32() {
265        let values = vec![-1i32, 0, 42, i32::MAX];
266        let bytes = i32::encode_chunk(&values);
267        let back = i32::decode_chunk(&bytes);
268        assert_eq!(back, values);
269    }
270
271    #[test]
272    fn string_roundtrip() {
273        let values = vec!["hello".to_string(), "".to_string(), "world!".to_string()];
274        let bytes = String::encode_chunk(&values);
275        let back = String::decode_chunk(&bytes);
276        assert_eq!(back, values);
277    }
278
279    #[test]
280    fn binary_roundtrip() {
281        let values = vec![vec![1u8, 2, 3], vec![], vec![255]];
282        let bytes = Vec::<u8>::encode_chunk(&values);
283        let back = Vec::<u8>::decode_chunk(&bytes);
284        assert_eq!(back, values);
285    }
286
287    #[test]
288    fn fill_element_numeric() {
289        assert_eq!(i32::fill_element(Some(&FillValue::Int(-7))), -7i32);
290        assert_eq!(f64::fill_element(Some(&FillValue::Float(1.5))), 1.5f64);
291        assert_eq!(u8::fill_element(None), 0u8);
292    }
293
294    #[test]
295    fn fill_element_vlen_ignores_fill() {
296        assert_eq!(String::fill_element(Some(&FillValue::Int(99))), "");
297        assert_eq!(Vec::<u8>::fill_element(None), Vec::<u8>::new());
298    }
299
300    #[test]
301    fn decode_empty() {
302        assert_eq!(i32::decode_chunk(&[]), Vec::<i32>::new());
303        assert_eq!(String::decode_chunk(&[]), Vec::<String>::new());
304    }
305
306    #[test]
307    fn timestamp_roundtrip() {
308        let values = vec![
309            TimestampNs(0),
310            TimestampNs(1_700_000_000_000_000_000),
311            TimestampNs(-1),
312            TimestampNs(i64::MAX),
313            TimestampNs(i64::MIN),
314        ];
315        let bytes = TimestampNs::encode_chunk(&values);
316        assert_eq!(bytes.len(), values.len() * 8);
317        let back = TimestampNs::decode_chunk(&bytes);
318        assert_eq!(back, values);
319    }
320
321    #[test]
322    fn timestamp_fill_element() {
323        assert_eq!(
324            TimestampNs::fill_element(Some(&FillValue::TimestampNs(123))),
325            TimestampNs(123)
326        );
327        assert_eq!(
328            TimestampNs::fill_element(Some(&FillValue::Int(7))),
329            TimestampNs(7)
330        );
331        assert_eq!(TimestampNs::fill_element(None), TimestampNs(0));
332    }
333}