vortex_array/arrays/varbin/
canonical.rs

1use arrow_schema::DataType;
2use vortex_dtype::DType;
3use vortex_error::VortexResult;
4
5use crate::arrays::VarBinViewArray;
6use crate::arrays::varbin::VarBinArray;
7use crate::arrow::{FromArrowArray, IntoArrowArray};
8use crate::{Array, ArrayCanonicalImpl, ArrayRef, Canonical, TryFromArrayRef};
9
10impl ArrayCanonicalImpl for VarBinArray {
11    fn _to_canonical(&self) -> VortexResult<Canonical> {
12        let dtype = self.dtype().clone();
13        let nullable = dtype.is_nullable();
14
15        let array_ref = self.to_array().into_arrow_preferred()?;
16        let array = match dtype {
17            DType::Utf8(_) => arrow_cast::cast(array_ref.as_ref(), &DataType::Utf8View)?,
18            DType::Binary(_) => arrow_cast::cast(array_ref.as_ref(), &DataType::BinaryView)?,
19
20            _ => unreachable!("VarBinArray must have Utf8 or Binary dtype"),
21        };
22        VarBinViewArray::try_from_array(ArrayRef::from_arrow(array, nullable))
23            .map(Canonical::VarBinView)
24    }
25}
26
27#[cfg(test)]
28mod test {
29    use rstest::rstest;
30    use vortex_dtype::{DType, Nullability};
31
32    use crate::array::Array;
33    use crate::arrays::varbin::builder::VarBinBuilder;
34    use crate::canonical::ToCanonical;
35
36    #[rstest]
37    #[case(DType::Utf8(Nullability::Nullable))]
38    #[case(DType::Binary(Nullability::Nullable))]
39    fn test_canonical_varbin(#[case] dtype: DType) {
40        let mut varbin = VarBinBuilder::<i32>::with_capacity(10);
41        varbin.append_null();
42        varbin.append_null();
43        // inlined value
44        varbin.append_value("123456789012".as_bytes());
45        // non-inlinable value
46        varbin.append_value("1234567890123".as_bytes());
47        let varbin = varbin.finish(dtype.clone());
48
49        let canonical = varbin.to_varbinview().unwrap();
50        assert_eq!(canonical.dtype(), &dtype);
51
52        assert!(!canonical.is_valid(0).unwrap());
53        assert!(!canonical.is_valid(1).unwrap());
54
55        // First value is inlined (12 bytes)
56        assert!(canonical.views()[2].is_inlined());
57        assert_eq!(canonical.bytes_at(2).as_slice(), "123456789012".as_bytes());
58
59        // Second value is not inlined (13 bytes)
60        assert!(!canonical.views()[3].is_inlined());
61        assert_eq!(canonical.bytes_at(3).as_slice(), "1234567890123".as_bytes());
62    }
63}