vortex_array/vtable/serde.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5
6use vortex_buffer::ByteBuffer;
7use vortex_dtype::DType;
8use vortex_error::{VortexResult, vortex_bail};
9
10use crate::serde::ArrayChildren;
11use crate::vtable::{NotSupported, VTable};
12use crate::{DeserializeMetadata, EmptyMetadata, SerializeMetadata};
13
14/// VTable trait for building an array from its serialized components.
15///
16/// # Guarantees
17pub trait SerdeVTable<V: VTable> {
18 type Metadata: Debug + SerializeMetadata + DeserializeMetadata;
19
20 /// Exports metadata for an array.
21 ///
22 /// All other parts of the array are exported using the [`crate::vtable::VisitorVTable`].
23 ///
24 /// * If the array does not require serialized metadata, it should return
25 /// [`crate::metadata::EmptyMetadata`].
26 /// * If the array does not support serialization, it should return `None`.
27 fn metadata(array: &V::Array) -> VortexResult<Option<Self::Metadata>>;
28
29 /// Build an array from components.
30 ///
31 /// This is called on the file and IPC deserialization pathways, to reconstruct the array from
32 /// type-erased components.
33 ///
34 /// Encoding implementers should take note that all validation necessary to ensure the encoding
35 /// is safe to read should happen inside of this method.
36 ///
37 /// # Safety and correctness
38 ///
39 /// This method should *never* panic, it must always return an error or else it returns a
40 /// valid `Array` that meets all the encoding's preconditions.
41 ///
42 /// For example, the `build` implementation for a dictionary encoding should ensure that all
43 /// codes lie in the valid range. For a UTF-8 array, it should check the bytes to ensure they
44 /// are all valid string data bytes. Any corrupt files or malformed data buffers should be
45 /// caught here, before returning the deserialized array.
46 ///
47 /// # Validation
48 ///
49 /// Validation is mainly meant to ensure that all internal pointers in the encoding reference
50 /// valid ranges of data, and that all data conforms to its DType constraints. These ensure
51 /// that no array operations will panic at runtime, or yield undefined behavior when unsafe
52 /// operations like `get_unchecked` use indices in the array buffer.
53 ///
54 /// Examples of the kinds of validation that should be part of the `build` step:
55 ///
56 /// * Checking that any offsets buffers point to valid offsets in some other child array
57 /// * Checking that any buffers for data or validity have the appropriate size for the
58 /// encoding
59 /// * Running UTF-8 validation for any buffers that are expected to hold flat UTF-8 data
60 fn build(
61 encoding: &V::Encoding,
62 dtype: &DType,
63 len: usize,
64 metadata: &<Self::Metadata as DeserializeMetadata>::Output,
65 buffers: &[ByteBuffer],
66 children: &dyn ArrayChildren,
67 ) -> VortexResult<V::Array>;
68}
69
70impl<V: VTable> SerdeVTable<V> for NotSupported {
71 type Metadata = EmptyMetadata;
72
73 fn metadata(_array: &V::Array) -> VortexResult<Option<Self::Metadata>> {
74 Ok(None)
75 }
76
77 fn build(
78 encoding: &V::Encoding,
79 _dtype: &DType,
80 _len: usize,
81 _metadata: &Self::Metadata,
82 _buffers: &[ByteBuffer],
83 _children: &dyn ArrayChildren,
84 ) -> VortexResult<V::Array> {
85 vortex_bail!("Serde not supported by {} encoding", V::id(encoding));
86 }
87}