vortex_array/vtable/serde.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5
6use vortex_buffer::ByteBuffer;
7use vortex_dtype::DType;
8use vortex_error::{VortexResult, vortex_bail};
9
10use crate::serde::ArrayChildren;
11use crate::vtable::{NotSupported, VTable};
12use crate::{DeserializeMetadata, EmptyMetadata, SerializeMetadata};
13
14/// VTable trait for building an array from its serialized components.
15///
16/// # Guarantees
17pub trait SerdeVTable<V: VTable> {
18    type Metadata: Debug + SerializeMetadata + DeserializeMetadata;
19
20    /// Exports metadata for an array.
21    ///
22    /// All other parts of the array are exported using the [`crate::vtable::VisitorVTable`].
23    ///
24    /// * If the array does not require serialized metadata, it should return
25    ///   [`crate::metadata::EmptyMetadata`].
26    /// * If the array does not support serialization, it should return `None`.
27    fn metadata(array: &V::Array) -> VortexResult<Option<Self::Metadata>>;
28
29    /// Build an array from components.
30    ///
31    /// This is called on the file and IPC deserialization pathways, to reconstruct the array from
32    /// type-erased components.
33    ///
34    /// Encoding implementers should take note that all validation necessary to ensure the encoding
35    /// is safe to read should happen inside of this method.
36    ///
37    /// # Safety and correctness
38    ///
39    /// This method should *never* panic, it must always return an error or else it returns a
40    /// valid `Array` that meets all the encoding's preconditions.
41    ///
42    /// For example, the `build` implementation for a dictionary encoding should ensure that all
43    /// codes lie in the valid range. For a UTF-8 array, it should check the bytes to ensure they
44    /// are all valid string data bytes. Any corrupt files or malformed data buffers should be
45    /// caught here, before returning the deserialized array.
46    ///
47    /// # Validation
48    ///
49    /// Validation is mainly meant to ensure that all internal pointers in the encoding reference
50    /// valid ranges of data, and that all data conforms to its DType constraints. These ensure
51    /// that no array operations will panic at runtime, or yield undefined behavior when unsafe
52    /// operations like `get_unchecked` use indices in the array buffer.
53    ///
54    /// Examples of the kinds of validation that should be part of the `build` step:
55    ///
56    /// * Checking that any offsets buffers point to valid offsets in some other child array
57    /// * Checking that any buffers for data or validity have the appropriate size for the
58    ///   encoding
59    /// * Running UTF-8 validation for any buffers that are expected to hold flat UTF-8 data
60    fn build(
61        encoding: &V::Encoding,
62        dtype: &DType,
63        len: usize,
64        metadata: &<Self::Metadata as DeserializeMetadata>::Output,
65        buffers: &[ByteBuffer],
66        children: &dyn ArrayChildren,
67    ) -> VortexResult<V::Array>;
68}
69
70impl<V: VTable> SerdeVTable<V> for NotSupported {
71    type Metadata = EmptyMetadata;
72
73    fn metadata(_array: &V::Array) -> VortexResult<Option<Self::Metadata>> {
74        Ok(None)
75    }
76
77    fn build(
78        encoding: &V::Encoding,
79        _dtype: &DType,
80        _len: usize,
81        _metadata: &Self::Metadata,
82        _buffers: &[ByteBuffer],
83        _children: &dyn ArrayChildren,
84    ) -> VortexResult<V::Array> {
85        vortex_bail!("Serde not supported by {} encoding", V::id(encoding));
86    }
87}