vortex_array/
encoding.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Traits and types to define shared unique encoding identifiers.
5
6use std::any::Any;
7use std::fmt::{Debug, Display, Formatter};
8use std::sync::Arc;
9
10use arcref::ArcRef;
11use vortex_buffer::ByteBuffer;
12use vortex_dtype::DType;
13use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
14
15use crate::serde::ArrayChildren;
16use crate::vtable::{EncodeVTable, SerdeVTable, VTable};
17use crate::{Array, ArrayRef, Canonical, DeserializeMetadata};
18
19/// EncodingId is a globally unique name of the array's encoding.
20pub type EncodingId = ArcRef<str>;
21
22pub type EncodingRef = ArcRef<dyn Encoding>;
23
24/// Marker trait for array encodings with their associated Array type.
25pub trait Encoding: 'static + private::Sealed + Send + Sync + Debug {
26    /// Downcast the encoding to [`Any`].
27    fn as_any(&self) -> &dyn Any;
28
29    fn to_encoding(&self) -> EncodingRef;
30
31    fn into_encoding(self) -> EncodingRef
32    where
33        Self: Sized;
34
35    /// Returns the ID of the encoding.
36    fn id(&self) -> EncodingId;
37
38    /// Build an array from its parts.
39    fn build(
40        &self,
41        dtype: &DType,
42        len: usize,
43        metadata: &[u8],
44        buffers: &[ByteBuffer],
45        children: &dyn ArrayChildren,
46    ) -> VortexResult<ArrayRef>;
47
48    /// Encode the canonical array into this encoding implementation.
49    /// Returns `None` if this encoding does not support the given canonical array, for example
50    /// if the data type is incompatible.
51    ///
52    /// Panics if `like` is encoded with a different encoding.
53    fn encode(&self, input: &Canonical, like: Option<&dyn Array>)
54    -> VortexResult<Option<ArrayRef>>;
55}
56
57/// Adapter struct used to lift the [`VTable`] trait into an object-safe [`Encoding`]
58/// implementation.
59///
60/// Since this is a unit struct with `repr(transparent)`, we are able to turn un-adapted array
61/// structs into [`dyn Encoding`] using some cheeky casting inside [`std::ops::Deref`] and
62/// [`AsRef`]. See the `vtable!` macro for more details.
63#[repr(transparent)]
64pub struct EncodingAdapter<V: VTable>(V::Encoding);
65
66impl<V: VTable> Encoding for EncodingAdapter<V> {
67    fn as_any(&self) -> &dyn Any {
68        self
69    }
70
71    fn to_encoding(&self) -> EncodingRef {
72        ArcRef::new_arc(Arc::new(EncodingAdapter::<V>(self.0.clone())))
73    }
74
75    fn into_encoding(self) -> EncodingRef
76    where
77        Self: Sized,
78    {
79        todo!()
80    }
81
82    fn id(&self) -> EncodingId {
83        V::id(&self.0)
84    }
85
86    fn build(
87        &self,
88        dtype: &DType,
89        len: usize,
90        metadata: &[u8],
91        buffers: &[ByteBuffer],
92        children: &dyn ArrayChildren,
93    ) -> VortexResult<ArrayRef> {
94        let metadata =
95            <<V::SerdeVTable as SerdeVTable<V>>::Metadata as DeserializeMetadata>::deserialize(
96                metadata,
97            )?;
98        let array = <V::SerdeVTable as SerdeVTable<V>>::build(
99            &self.0, dtype, len, &metadata, buffers, children,
100        )?;
101        assert_eq!(array.len(), len, "Array length mismatch after building");
102        assert_eq!(array.dtype(), dtype, "Array dtype mismatch after building");
103        Ok(array.to_array())
104    }
105
106    fn encode(
107        &self,
108        input: &Canonical,
109        like: Option<&dyn Array>,
110    ) -> VortexResult<Option<ArrayRef>> {
111        let downcast_like = like
112            .map(|like| {
113                like.as_opt::<V>().ok_or_else(|| {
114                    vortex_err!(
115                        "Like array {} does not match requested encoding {}",
116                        like.encoding_id(),
117                        self.id()
118                    )
119                })
120            })
121            .transpose()?;
122
123        let Some(array) =
124            <V::EncodeVTable as EncodeVTable<V>>::encode(&self.0, input, downcast_like)?
125        else {
126            return Ok(None);
127        };
128
129        let input = input.as_ref();
130        if array.len() != input.len() {
131            vortex_bail!(
132                "Array length mismatch after encoding: {} != {}",
133                array.len(),
134                input.len()
135            );
136        }
137        if array.dtype() != input.dtype() {
138            vortex_bail!(
139                "Array dtype mismatch after encoding: {} != {}",
140                array.dtype(),
141                input.dtype()
142            );
143        }
144
145        Ok(Some(array.to_array()))
146    }
147}
148
149impl<V: VTable> Debug for EncodingAdapter<V> {
150    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
151        f.debug_struct("Encoding").field("id", &self.id()).finish()
152    }
153}
154
155impl Display for dyn Encoding + '_ {
156    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
157        write!(f, "{}", self.id())
158    }
159}
160
161impl PartialEq for dyn Encoding + '_ {
162    fn eq(&self, other: &Self) -> bool {
163        self.id() == other.id()
164    }
165}
166
167impl Eq for dyn Encoding + '_ {}
168
169impl dyn Encoding + '_ {
170    pub fn as_<V: VTable>(&self) -> &V::Encoding {
171        self.as_any()
172            .downcast_ref::<EncodingAdapter<V>>()
173            .map(|e| &e.0)
174            .vortex_expect("Encoding is not of the expected type")
175    }
176}
177
178mod private {
179    use super::*;
180
181    pub trait Sealed {}
182
183    impl<V: VTable> Sealed for EncodingAdapter<V> {}
184}