vortex_zstd/
serde.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_array::serde::ArrayChildren;
5use vortex_array::validity::Validity;
6use vortex_array::vtable::{EncodeVTable, SerdeVTable, VisitorVTable};
7use vortex_array::{ArrayBufferVisitor, ArrayChildVisitor, ProstMetadata};
8use vortex_buffer::ByteBuffer;
9use vortex_dtype::DType;
10use vortex_error::{VortexResult, vortex_bail};
11
12use crate::{ZstdArray, ZstdEncoding, ZstdVTable};
13
14#[derive(Clone, prost::Message)]
15pub struct ZstdFrameMetadata {
16    #[prost(uint64, tag = "1")]
17    pub uncompressed_size: u64,
18    #[prost(uint64, tag = "2")]
19    pub n_values: u64,
20}
21
22#[derive(Clone, prost::Message)]
23pub struct ZstdMetadata {
24    // optional, will be 0 if there's no dictionary
25    #[prost(uint32, tag = "1")]
26    pub dictionary_size: u32,
27    #[prost(message, repeated, tag = "2")]
28    pub frames: Vec<ZstdFrameMetadata>,
29}
30
31impl SerdeVTable<ZstdVTable> for ZstdVTable {
32    type Metadata = ProstMetadata<ZstdMetadata>;
33
34    fn metadata(array: &ZstdArray) -> VortexResult<Option<Self::Metadata>> {
35        Ok(Some(ProstMetadata(array.metadata.clone())))
36    }
37
38    fn build(
39        _encoding: &ZstdEncoding,
40        dtype: &DType,
41        len: usize,
42        metadata: &ZstdMetadata,
43        buffers: &[ByteBuffer],
44        children: &dyn ArrayChildren,
45    ) -> VortexResult<ZstdArray> {
46        let validity = if children.is_empty() {
47            Validity::from(dtype.nullability())
48        } else if children.len() == 1 {
49            let validity = children.get(0, &Validity::DTYPE, len)?;
50            Validity::Array(validity)
51        } else {
52            vortex_bail!("ZstdArray expected 0 or 1 child, got {}", children.len());
53        };
54
55        let (dictionary_buffer, compressed_buffers) = if metadata.dictionary_size == 0 {
56            // no dictionary
57            (None, buffers.to_vec())
58        } else {
59            // with dictionary
60            (Some(buffers[0].clone()), buffers[1..].to_vec())
61        };
62
63        Ok(ZstdArray::new(
64            dictionary_buffer,
65            compressed_buffers,
66            dtype.clone(),
67            metadata.clone(),
68            len,
69            validity,
70        ))
71    }
72}
73
74impl EncodeVTable<ZstdVTable> for ZstdVTable {
75    fn encode(
76        _encoding: &<ZstdVTable as vortex_array::vtable::VTable>::Encoding,
77        canonical: &vortex_array::Canonical,
78        _like: Option<&ZstdArray>,
79    ) -> VortexResult<Option<ZstdArray>> {
80        ZstdArray::from_canonical(canonical, 3, 0)
81    }
82}
83
84impl VisitorVTable<ZstdVTable> for ZstdVTable {
85    fn visit_buffers(array: &ZstdArray, visitor: &mut dyn ArrayBufferVisitor) {
86        if let Some(buffer) = &array.dictionary {
87            visitor.visit_buffer(buffer);
88        }
89        for buffer in &array.frames {
90            visitor.visit_buffer(buffer);
91        }
92    }
93
94    fn visit_children(array: &ZstdArray, visitor: &mut dyn ArrayChildVisitor) {
95        visitor.visit_validity(&array.unsliced_validity, array.unsliced_n_rows());
96    }
97}