vortex_array/
encoding.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Traits and types to define shared unique encoding identifiers.
5
6use std::any::Any;
7use std::fmt::{Debug, Display, Formatter};
8use std::sync::Arc;
9
10use arcref::ArcRef;
11use vortex_buffer::ByteBuffer;
12use vortex_dtype::DType;
13use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
14
15use crate::serde::ArrayChildren;
16use crate::vtable::{EncodeVTable, SerdeVTable, VTable};
17use crate::{Array, ArrayRef, Canonical, DeserializeMetadata};
18
19/// EncodingId is a globally unique name of the array's encoding.
20pub type EncodingId = ArcRef<str>;
21
22pub type EncodingRef = ArcRef<dyn Encoding>;
23
24/// Marker trait for array encodings with their associated Array type.
25pub trait Encoding: 'static + private::Sealed + Send + Sync + Debug {
26    /// Downcast the encoding to [`Any`].
27    fn as_any(&self) -> &dyn Any;
28
29    fn to_encoding(&self) -> EncodingRef;
30
31    /// Returns the ID of the encoding.
32    fn id(&self) -> EncodingId;
33
34    /// Build an array from its parts.
35    fn build(
36        &self,
37        dtype: &DType,
38        len: usize,
39        metadata: &[u8],
40        buffers: &[ByteBuffer],
41        children: &dyn ArrayChildren,
42    ) -> VortexResult<ArrayRef>;
43
44    /// Encode the canonical array into this encoding implementation.
45    /// Returns `None` if this encoding does not support the given canonical array, for example
46    /// if the data type is incompatible.
47    ///
48    /// Panics if `like` is encoded with a different encoding.
49    fn encode(&self, input: &Canonical, like: Option<&dyn Array>)
50    -> VortexResult<Option<ArrayRef>>;
51}
52
53/// Adapter struct used to lift the [`VTable`] trait into an object-safe [`Encoding`]
54/// implementation.
55///
56/// Since this is a unit struct with `repr(transparent)`, we are able to turn un-adapted array
57/// structs into [`dyn Encoding`] using some cheeky casting inside [`std::ops::Deref`] and
58/// [`AsRef`]. See the `vtable!` macro for more details.
59#[repr(transparent)]
60pub struct EncodingAdapter<V: VTable>(V::Encoding);
61
62impl<V: VTable> Encoding for EncodingAdapter<V> {
63    fn as_any(&self) -> &dyn Any {
64        self
65    }
66
67    fn to_encoding(&self) -> EncodingRef {
68        ArcRef::new_arc(Arc::new(EncodingAdapter::<V>(self.0.clone())))
69    }
70
71    fn id(&self) -> EncodingId {
72        V::id(&self.0)
73    }
74
75    fn build(
76        &self,
77        dtype: &DType,
78        len: usize,
79        metadata: &[u8],
80        buffers: &[ByteBuffer],
81        children: &dyn ArrayChildren,
82    ) -> VortexResult<ArrayRef> {
83        let metadata =
84            <<V::SerdeVTable as SerdeVTable<V>>::Metadata as DeserializeMetadata>::deserialize(
85                metadata,
86            )?;
87        let array = <V::SerdeVTable as SerdeVTable<V>>::build(
88            &self.0, dtype, len, &metadata, buffers, children,
89        )?;
90        assert_eq!(array.len(), len, "Array length mismatch after building");
91        assert_eq!(array.dtype(), dtype, "Array dtype mismatch after building");
92        Ok(array.to_array())
93    }
94
95    fn encode(
96        &self,
97        input: &Canonical,
98        like: Option<&dyn Array>,
99    ) -> VortexResult<Option<ArrayRef>> {
100        let downcast_like = like
101            .map(|like| {
102                like.as_opt::<V>().ok_or_else(|| {
103                    vortex_err!(
104                        "Like array {} does not match requested encoding {}",
105                        like.encoding_id(),
106                        self.id()
107                    )
108                })
109            })
110            .transpose()?;
111
112        let Some(array) =
113            <V::EncodeVTable as EncodeVTable<V>>::encode(&self.0, input, downcast_like)?
114        else {
115            return Ok(None);
116        };
117
118        let input = input.as_ref();
119        if array.len() != input.len() {
120            vortex_bail!(
121                "Array length mismatch after encoding: {} != {}",
122                array.len(),
123                input.len()
124            );
125        }
126        if array.dtype() != input.dtype() {
127            vortex_bail!(
128                "Array dtype mismatch after encoding: {} != {}",
129                array.dtype(),
130                input.dtype()
131            );
132        }
133
134        Ok(Some(array.to_array()))
135    }
136}
137
138impl<V: VTable> Debug for EncodingAdapter<V> {
139    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
140        f.debug_struct("Encoding").field("id", &self.id()).finish()
141    }
142}
143
144impl Display for dyn Encoding + '_ {
145    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
146        write!(f, "{}", self.id())
147    }
148}
149
150impl PartialEq for dyn Encoding + '_ {
151    fn eq(&self, other: &Self) -> bool {
152        self.id() == other.id()
153    }
154}
155
156impl Eq for dyn Encoding + '_ {}
157
158impl dyn Encoding + '_ {
159    pub fn as_<V: VTable>(&self) -> &V::Encoding {
160        self.as_any()
161            .downcast_ref::<EncodingAdapter<V>>()
162            .map(|e| &e.0)
163            .vortex_expect("Encoding is not of the expected type")
164    }
165}
166
167mod private {
168    use super::*;
169
170    pub trait Sealed {}
171
172    impl<V: VTable> Sealed for EncodingAdapter<V> {}
173}