vortex_array/arrays/varbin/
canonical.rs1use std::sync::Arc;
5
6use arrow_array::cast::AsArray;
7use arrow_array::{BinaryViewArray, StringViewArray};
8use arrow_schema::DataType;
9use vortex_dtype::DType;
10use vortex_error::VortexExpect;
11
12use crate::arrays::VarBinVTable;
13use crate::arrays::varbin::VarBinArray;
14use crate::arrow::{FromArrowArray, IntoArrowArray};
15use crate::vtable::CanonicalVTable;
16use crate::{ArrayRef, Canonical, ToCanonical};
17
18impl CanonicalVTable<VarBinVTable> for VarBinVTable {
19 fn canonicalize(array: &VarBinArray) -> Canonical {
20 let dtype = array.dtype().clone();
21 let nullable = dtype.is_nullable();
22
23 let array_ref = array
24 .to_array()
25 .into_arrow_preferred()
26 .vortex_expect("VarBinArray must be convertible to arrow array");
27
28 let array = match (&dtype, array_ref.data_type()) {
29 (DType::Utf8(_), DataType::Utf8) => {
30 Arc::new(StringViewArray::from(array_ref.as_string::<i32>()))
31 as Arc<dyn arrow_array::Array>
32 }
33 (DType::Utf8(_), DataType::LargeUtf8) => {
34 Arc::new(StringViewArray::from(array_ref.as_string::<i64>()))
35 as Arc<dyn arrow_array::Array>
36 }
37
38 (DType::Binary(_), DataType::Binary) => {
39 Arc::new(BinaryViewArray::from(array_ref.as_binary::<i32>()))
40 }
41 (DType::Binary(_), DataType::LargeBinary) => {
42 Arc::new(BinaryViewArray::from(array_ref.as_binary::<i64>()))
43 }
44 (DType::Binary(_), DataType::BinaryView) | (DType::Utf8(_), DataType::Utf8View) => {
46 array_ref
47 }
48 _ => unreachable!("VarBinArray must have Utf8 or Binary dtype, instead got: {dtype}",),
49 };
50 Canonical::VarBinView(ArrayRef::from_arrow(array.as_ref(), nullable).to_varbinview())
51 }
52}
53
54#[cfg(test)]
55mod test {
56 use rstest::rstest;
57 use vortex_dtype::{DType, Nullability};
58
59 use crate::arrays::varbin::builder::VarBinBuilder;
60 use crate::canonical::ToCanonical;
61
62 #[rstest]
63 #[case(DType::Utf8(Nullability::Nullable))]
64 #[case(DType::Binary(Nullability::Nullable))]
65 fn test_canonical_varbin(#[case] dtype: DType) {
66 let mut varbin = VarBinBuilder::<i32>::with_capacity(10);
67 varbin.append_null();
68 varbin.append_null();
69 varbin.append_value("123456789012".as_bytes());
71 varbin.append_value("1234567890123".as_bytes());
73 let varbin = varbin.finish(dtype.clone());
74
75 let canonical = varbin.to_varbinview();
76 assert_eq!(canonical.dtype(), &dtype);
77
78 assert!(!canonical.is_valid(0));
79 assert!(!canonical.is_valid(1));
80
81 assert!(canonical.views()[2].is_inlined());
83 assert_eq!(canonical.bytes_at(2).as_slice(), "123456789012".as_bytes());
84
85 assert!(!canonical.views()[3].is_inlined());
87 assert_eq!(canonical.bytes_at(3).as_slice(), "1234567890123".as_bytes());
88 }
89}