vortex_array/arrays/varbin/
canonical.rs1use std::sync::Arc;
5
6use arrow_array::cast::AsArray;
7use arrow_array::{BinaryViewArray, StringViewArray};
8use arrow_schema::DataType;
9use vortex_dtype::DType;
10use vortex_error::VortexResult;
11
12use crate::arrays::VarBinVTable;
13use crate::arrays::varbin::VarBinArray;
14use crate::arrow::{FromArrowArray, IntoArrowArray};
15use crate::vtable::CanonicalVTable;
16use crate::{ArrayRef, Canonical, ToCanonical};
17
18impl CanonicalVTable<VarBinVTable> for VarBinVTable {
19 fn canonicalize(array: &VarBinArray) -> VortexResult<Canonical> {
20 let dtype = array.dtype().clone();
21 let nullable = dtype.is_nullable();
22
23 let array_ref = array.to_array().into_arrow_preferred()?;
24
25 let array = match (&dtype, array_ref.data_type()) {
26 (DType::Utf8(_), DataType::Utf8) => {
27 Arc::new(StringViewArray::from(array_ref.as_string::<i32>()))
28 as Arc<dyn arrow_array::Array>
29 }
30 (DType::Utf8(_), DataType::LargeUtf8) => {
31 Arc::new(StringViewArray::from(array_ref.as_string::<i64>()))
32 as Arc<dyn arrow_array::Array>
33 }
34
35 (DType::Binary(_), DataType::Binary) => {
36 Arc::new(BinaryViewArray::from(array_ref.as_binary::<i32>()))
37 }
38 (DType::Binary(_), DataType::LargeBinary) => {
39 Arc::new(BinaryViewArray::from(array_ref.as_binary::<i64>()))
40 }
41 (DType::Binary(_), DataType::BinaryView) | (DType::Utf8(_), DataType::Utf8View) => {
43 array_ref
44 }
45 _ => unreachable!("VarBinArray must have Utf8 or Binary dtype, instead got: {dtype}",),
46 };
47 Ok(Canonical::VarBinView(
48 ArrayRef::from_arrow(array.as_ref(), nullable).to_varbinview()?,
49 ))
50 }
51}
52
53#[cfg(test)]
54mod test {
55 use rstest::rstest;
56 use vortex_dtype::{DType, Nullability};
57
58 use crate::arrays::varbin::builder::VarBinBuilder;
59 use crate::canonical::ToCanonical;
60
61 #[rstest]
62 #[case(DType::Utf8(Nullability::Nullable))]
63 #[case(DType::Binary(Nullability::Nullable))]
64 fn test_canonical_varbin(#[case] dtype: DType) {
65 let mut varbin = VarBinBuilder::<i32>::with_capacity(10);
66 varbin.append_null();
67 varbin.append_null();
68 varbin.append_value("123456789012".as_bytes());
70 varbin.append_value("1234567890123".as_bytes());
72 let varbin = varbin.finish(dtype.clone());
73
74 let canonical = varbin.to_varbinview().unwrap();
75 assert_eq!(canonical.dtype(), &dtype);
76
77 assert!(!canonical.is_valid(0).unwrap());
78 assert!(!canonical.is_valid(1).unwrap());
79
80 assert!(canonical.views()[2].is_inlined());
82 assert_eq!(canonical.bytes_at(2).as_slice(), "123456789012".as_bytes());
83
84 assert!(!canonical.views()[3].is_inlined());
86 assert_eq!(canonical.bytes_at(3).as_slice(), "1234567890123".as_bytes());
87 }
88}