vortex_array/arrays/varbin/
canonical.rs

1use arrow_schema::DataType;
2use vortex_dtype::DType;
3use vortex_error::VortexResult;
4
5use crate::arrays::VarBinVTable;
6use crate::arrays::varbin::VarBinArray;
7use crate::arrow::{FromArrowArray, IntoArrowArray};
8use crate::vtable::CanonicalVTable;
9use crate::{ArrayRef, Canonical, ToCanonical};
10
11impl CanonicalVTable<VarBinVTable> for VarBinVTable {
12    fn canonicalize(array: &VarBinArray) -> VortexResult<Canonical> {
13        let dtype = array.dtype().clone();
14        let nullable = dtype.is_nullable();
15
16        let array_ref = array.to_array().into_arrow_preferred()?;
17        let array = match dtype {
18            DType::Utf8(_) => arrow_cast::cast(array_ref.as_ref(), &DataType::Utf8View)?,
19            DType::Binary(_) => arrow_cast::cast(array_ref.as_ref(), &DataType::BinaryView)?,
20
21            _ => unreachable!("VarBinArray must have Utf8 or Binary dtype"),
22        };
23        Ok(Canonical::VarBinView(
24            ArrayRef::from_arrow(array.as_ref(), nullable).to_varbinview()?,
25        ))
26    }
27}
28
29#[cfg(test)]
30mod test {
31    use rstest::rstest;
32    use vortex_dtype::{DType, Nullability};
33
34    use crate::arrays::varbin::builder::VarBinBuilder;
35    use crate::canonical::ToCanonical;
36
37    #[rstest]
38    #[case(DType::Utf8(Nullability::Nullable))]
39    #[case(DType::Binary(Nullability::Nullable))]
40    fn test_canonical_varbin(#[case] dtype: DType) {
41        let mut varbin = VarBinBuilder::<i32>::with_capacity(10);
42        varbin.append_null();
43        varbin.append_null();
44        // inlined value
45        varbin.append_value("123456789012".as_bytes());
46        // non-inlinable value
47        varbin.append_value("1234567890123".as_bytes());
48        let varbin = varbin.finish(dtype.clone());
49
50        let canonical = varbin.to_varbinview().unwrap();
51        assert_eq!(canonical.dtype(), &dtype);
52
53        assert!(!canonical.is_valid(0).unwrap());
54        assert!(!canonical.is_valid(1).unwrap());
55
56        // First value is inlined (12 bytes)
57        assert!(canonical.views()[2].is_inlined());
58        assert_eq!(canonical.bytes_at(2).as_slice(), "123456789012".as_bytes());
59
60        // Second value is not inlined (13 bytes)
61        assert!(!canonical.views()[3].is_inlined());
62        assert_eq!(canonical.bytes_at(3).as_slice(), "1234567890123".as_bytes());
63    }
64}