vortex_array/arrays/varbin/
canonical.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use arrow_schema::DataType;
5use vortex_dtype::DType;
6use vortex_error::VortexResult;
7
8use crate::arrays::VarBinVTable;
9use crate::arrays::varbin::VarBinArray;
10use crate::arrow::{FromArrowArray, IntoArrowArray};
11use crate::vtable::CanonicalVTable;
12use crate::{ArrayRef, Canonical, ToCanonical};
13
14impl CanonicalVTable<VarBinVTable> for VarBinVTable {
15    fn canonicalize(array: &VarBinArray) -> VortexResult<Canonical> {
16        let dtype = array.dtype().clone();
17        let nullable = dtype.is_nullable();
18
19        let array_ref = array.to_array().into_arrow_preferred()?;
20        let array = match dtype {
21            DType::Utf8(_) => arrow_cast::cast(array_ref.as_ref(), &DataType::Utf8View)?,
22            DType::Binary(_) => arrow_cast::cast(array_ref.as_ref(), &DataType::BinaryView)?,
23
24            _ => unreachable!("VarBinArray must have Utf8 or Binary dtype"),
25        };
26        Ok(Canonical::VarBinView(
27            ArrayRef::from_arrow(array.as_ref(), nullable).to_varbinview()?,
28        ))
29    }
30}
31
32#[cfg(test)]
33mod test {
34    use rstest::rstest;
35    use vortex_dtype::{DType, Nullability};
36
37    use crate::arrays::varbin::builder::VarBinBuilder;
38    use crate::canonical::ToCanonical;
39
40    #[rstest]
41    #[case(DType::Utf8(Nullability::Nullable))]
42    #[case(DType::Binary(Nullability::Nullable))]
43    fn test_canonical_varbin(#[case] dtype: DType) {
44        let mut varbin = VarBinBuilder::<i32>::with_capacity(10);
45        varbin.append_null();
46        varbin.append_null();
47        // inlined value
48        varbin.append_value("123456789012".as_bytes());
49        // non-inlinable value
50        varbin.append_value("1234567890123".as_bytes());
51        let varbin = varbin.finish(dtype.clone());
52
53        let canonical = varbin.to_varbinview().unwrap();
54        assert_eq!(canonical.dtype(), &dtype);
55
56        assert!(!canonical.is_valid(0).unwrap());
57        assert!(!canonical.is_valid(1).unwrap());
58
59        // First value is inlined (12 bytes)
60        assert!(canonical.views()[2].is_inlined());
61        assert_eq!(canonical.bytes_at(2).as_slice(), "123456789012".as_bytes());
62
63        // Second value is not inlined (13 bytes)
64        assert!(!canonical.views()[3].is_inlined());
65        assert_eq!(canonical.bytes_at(3).as_slice(), "1234567890123".as_bytes());
66    }
67}