vortex_array/arrays/varbin/vtable/
canonical.rs1use std::sync::Arc;
5
6use arrow_array::BinaryViewArray;
7use arrow_array::StringViewArray;
8use arrow_array::cast::AsArray;
9use arrow_schema::DataType;
10use vortex_dtype::DType;
11use vortex_error::VortexExpect;
12
13use crate::ArrayRef;
14use crate::Canonical;
15use crate::ToCanonical;
16use crate::arrays::VarBinVTable;
17use crate::arrays::varbin::VarBinArray;
18use crate::arrow::FromArrowArray;
19use crate::arrow::IntoArrowArray;
20use crate::vtable::CanonicalVTable;
21
22impl CanonicalVTable<VarBinVTable> for VarBinVTable {
23 fn canonicalize(array: &VarBinArray) -> Canonical {
24 let dtype = array.dtype().clone();
25 let nullable = dtype.is_nullable();
26
27 let array_ref = array
28 .to_array()
29 .into_arrow_preferred()
30 .vortex_expect("VarBinArray must be convertible to arrow array");
31
32 let array = match (&dtype, array_ref.data_type()) {
33 (DType::Utf8(_), DataType::Utf8) => {
34 Arc::new(StringViewArray::from(array_ref.as_string::<i32>()))
35 as Arc<dyn arrow_array::Array>
36 }
37 (DType::Utf8(_), DataType::LargeUtf8) => {
38 Arc::new(StringViewArray::from(array_ref.as_string::<i64>()))
39 as Arc<dyn arrow_array::Array>
40 }
41
42 (DType::Binary(_), DataType::Binary) => {
43 Arc::new(BinaryViewArray::from(array_ref.as_binary::<i32>()))
44 }
45 (DType::Binary(_), DataType::LargeBinary) => {
46 Arc::new(BinaryViewArray::from(array_ref.as_binary::<i64>()))
47 }
48 (DType::Binary(_), DataType::BinaryView) | (DType::Utf8(_), DataType::Utf8View) => {
50 array_ref
51 }
52 _ => unreachable!("VarBinArray must have Utf8 or Binary dtype, instead got: {dtype}",),
53 };
54 Canonical::VarBinView(ArrayRef::from_arrow(array.as_ref(), nullable).to_varbinview())
55 }
56}
57
58#[cfg(test)]
59mod tests {
60 use rstest::rstest;
61 use vortex_dtype::DType;
62 use vortex_dtype::Nullability;
63
64 use crate::arrays::varbin::builder::VarBinBuilder;
65 use crate::canonical::ToCanonical;
66
67 #[rstest]
68 #[case(DType::Utf8(Nullability::Nullable))]
69 #[case(DType::Binary(Nullability::Nullable))]
70 fn test_canonical_varbin(#[case] dtype: DType) {
71 let mut varbin = VarBinBuilder::<i32>::with_capacity(10);
72 varbin.append_null();
73 varbin.append_null();
74 varbin.append_value("123456789012".as_bytes());
76 varbin.append_value("1234567890123".as_bytes());
78 let varbin = varbin.finish(dtype.clone());
79
80 let canonical = varbin.to_varbinview();
81 assert_eq!(canonical.dtype(), &dtype);
82
83 assert!(!canonical.is_valid(0));
84 assert!(!canonical.is_valid(1));
85
86 assert!(canonical.views()[2].is_inlined());
88 assert_eq!(canonical.bytes_at(2).as_slice(), "123456789012".as_bytes());
89
90 assert!(!canonical.views()[3].is_inlined());
92 assert_eq!(canonical.bytes_at(3).as_slice(), "1234567890123".as_bytes());
93 }
94}