use arrow::array::*;
use arrow::datatypes::*;
use tokio::io::AsyncWriteExt;
use crate::io::{ClickHouseBytesWrite, ClickHouseWrite};
use crate::{Error, Result, Type};
pub(super) async fn serialize_async<W: ClickHouseWrite>(
type_hint: &Type,
writer: &mut W,
values: &ArrayRef,
) -> Result<()> {
match type_hint.strip_null() {
Type::Enum8(pairs) => write_enum8_values(values, writer, pairs).await?,
Type::Enum16(pairs) => write_enum16_values(values, writer, pairs).await?,
_ => {
return Err(Error::ArrowSerialize(format!("Unsupported data type: {type_hint:?}")));
}
}
Ok(())
}
pub(super) fn serialize<W: ClickHouseBytesWrite>(
type_hint: &Type,
writer: &mut W,
values: &ArrayRef,
) -> Result<()> {
match type_hint.strip_null() {
Type::Enum8(pairs) => put_enum8_values(values, writer, pairs)?,
Type::Enum16(pairs) => put_enum16_values(values, writer, pairs)?,
_ => {
return Err(Error::ArrowSerialize(format!("Unsupported data type: {type_hint:?}")));
}
}
Ok(())
}
macro_rules! write_enum_values {
($name:ident, enum $pt:ty, $write_fn:ident, [$($kt:ty),*], [$($at:ty),*], [$($st:ty),*]) => {
#[allow(unused_comparisons)]
#[allow(clippy::too_many_lines)]
#[allow(clippy::cast_lossless)]
#[allow(clippy::cast_sign_loss)]
#[allow(clippy::cast_possible_wrap)]
#[allow(clippy::cast_possible_truncation)]
#[allow(trivial_numeric_casts)]
async fn $name<W: ClickHouseWrite>(
column: &::arrow::array::ArrayRef,
writer: &mut W,
enum_values: &[(String, $pt)], ) -> Result<()> {
$(
if let Some(array) = column.as_any().downcast_ref::<DictionaryArray<$kt>>() {
let keys = array.keys();
let values = array.values().as_any().downcast_ref::<StringArray>().ok_or_else(|| {
Error::ArrowSerialize("Enum values must be strings".into())
})?;
if values.len() != enum_values.len() {
return Err(Error::ArrowSerialize(format!(
"Enum value count mismatch: {} vs {}",
values.len(), enum_values.len()
)));
}
for i in 0..values.len() {
let dict_val = values.value(i);
let enum_val = &enum_values[i].0;
if dict_val != enum_val {
return Err(Error::ArrowSerialize(format!(
"Enum value mismatch at index {i}: '{dict_val}' vs '{enum_val}'"
)));
}
}
for i in 0..keys.len() {
let value = if keys.is_null(i) {
0 } else {
let key = keys.value(i);
if key < 0 || key as usize >= enum_values.len() {
return Err(Error::ArrowSerialize(
format!("Dictionary key {key} out of bounds")
));
}
enum_values[key as usize].1 };
writer.$write_fn(value).await?;
}
return Ok(());
}
)*
$(
if let Some(array) = column.as_any().downcast_ref::<PrimitiveArray<$at>>() {
for i in 0..array.len() {
let value = if array.is_null(i) { 0 } else { array.value(i) as $pt };
if !enum_values.iter().any(|(_, v)| *v == value) {
return Err(Error::ArrowSerialize(
format!("Value {value} not found in enum")
));
}
writer.$write_fn(value).await?;
}
return Ok(());
}
)*
$(
if let Some(array) = column.as_string_opt::<$st>() {
let value_map: std::collections::HashMap<&str, $pt> = enum_values
.iter()
.map(|(s, v)| (s.as_str(), *v))
.collect();
for i in 0..array.len() {
if array.is_null(i) {
writer.$write_fn(0).await?
} else {
let value = array.value(i);
let key = value_map.get(value).copied().ok_or(
Error::ArrowSerialize(format!(
"String '{value}' not in enum"
))
)?;
writer.$write_fn(key).await?;
}
}
return Ok(());
}
if let Some(array) = column.as_binary_opt::<$st>() {
let value_map: std::collections::HashMap<&str, $pt> = enum_values
.iter()
.map(|(s, v)| (s.as_str(), *v))
.collect();
for i in 0..array.len() {
if array.is_null(i) {
writer.$write_fn(0).await?
} else {
let value = array.value(i);
let value_str = ::std::str::from_utf8(value)?;
let key = value_map.get(value_str).copied().ok_or(
Error::ArrowSerialize(format!(
"String '{value_str}' not in enum"
))
)?;
writer.$write_fn(key).await?;
}
}
return Ok(());
}
)*
if let Some(array) = column.as_string_view_opt() {
let value_map: std::collections::HashMap<&str, $pt> = enum_values
.iter()
.map(|(s, v)| (s.as_str(), *v))
.collect();
for i in 0..array.len() {
if array.is_null(i) {
writer.$write_fn(0).await?
} else {
let value = array.value(i);
let key = value_map.get(value).copied().ok_or(
Error::ArrowSerialize(format!(
"String '{value}' not in enum"
))
)?;
writer.$write_fn(key).await?;
}
}
return Ok(());
}
if let Some(array) = column.as_binary_view_opt() {
let value_map: std::collections::HashMap<&str, $pt> = enum_values
.iter()
.map(|(s, v)| (s.as_str(), *v))
.collect();
for i in 0..array.len() {
if array.is_null(i) {
writer.$write_fn(0).await?
} else {
let value = array.value(i);
let value_str = ::std::str::from_utf8(value)?;
let key = value_map.get(value_str).copied().ok_or(
Error::ArrowSerialize(format!(
"String '{value_str}' not in enum"
))
)?;
writer.$write_fn(key).await?;
}
}
return Ok(());
}
Err(Error::ArrowSerialize(format!(
"Expected DictionaryArray, PrimitiveArray, StringArray, or BinaryArray, got {:?}",
column.data_type()
)))
}
};
}
macro_rules! put_enum_values {
($name:ident, enum $pt:ty, $write_fn:ident, [$($kt:ty),*], [$($at:ty),*], [$($st:ty),*]) => {
#[allow(unused_comparisons)]
#[allow(clippy::too_many_lines)]
#[allow(clippy::cast_lossless)]
#[allow(clippy::cast_sign_loss)]
#[allow(clippy::cast_possible_wrap)]
#[allow(clippy::cast_possible_truncation)]
#[allow(trivial_numeric_casts)]
fn $name<W: $crate::io::ClickHouseBytesWrite>(
column: &::arrow::array::ArrayRef,
writer: &mut W,
enum_values: &[(String, $pt)], ) -> Result<()> {
$(
if let Some(array) = column.as_any().downcast_ref::<DictionaryArray<$kt>>() {
let keys = array.keys();
let values = array.values().as_any().downcast_ref::<StringArray>().ok_or_else(|| {
Error::ArrowSerialize("Enum values must be strings".into())
})?;
if values.len() != enum_values.len() {
return Err(Error::ArrowSerialize(format!(
"Enum value count mismatch: {} vs {}",
values.len(), enum_values.len()
)));
}
for i in 0..values.len() {
let dict_val = values.value(i);
let enum_val = &enum_values[i].0;
if dict_val != enum_val {
return Err(Error::ArrowSerialize(format!(
"Enum value mismatch at index {i}: '{dict_val}' vs '{enum_val}'"
)));
}
}
for i in 0..keys.len() {
let value = if keys.is_null(i) {
0 } else {
let key = keys.value(i);
if key < 0 || key as usize >= enum_values.len() {
return Err(Error::ArrowSerialize(
format!("Dictionary key {key} out of bounds")
));
}
enum_values[key as usize].1 };
writer.$write_fn(value);
}
return Ok(());
}
)*
$(
if let Some(array) = column.as_any().downcast_ref::<PrimitiveArray<$at>>() {
for i in 0..array.len() {
let value = if array.is_null(i) { 0 } else { array.value(i) as $pt };
if !enum_values.iter().any(|(_, v)| *v == value) {
return Err(Error::ArrowSerialize(
format!("Value {value} not found in enum")
));
}
writer.$write_fn(value);
}
return Ok(());
}
)*
$(
if let Some(array) = column.as_string_opt::<$st>() {
let value_map: std::collections::HashMap<&str, $pt> = enum_values
.iter()
.map(|(s, v)| (s.as_str(), *v))
.collect();
for i in 0..array.len() {
if array.is_null(i) {
writer.$write_fn(0)
} else {
let value = array.value(i);
let key = value_map.get(value).copied().ok_or(
Error::ArrowSerialize(format!(
"String '{value}' not in enum"
))
)?;
writer.$write_fn(key);
}
}
return Ok(());
}
if let Some(array) = column.as_binary_opt::<$st>() {
let value_map: std::collections::HashMap<&str, $pt> = enum_values
.iter()
.map(|(s, v)| (s.as_str(), *v))
.collect();
for i in 0..array.len() {
if array.is_null(i) {
writer.$write_fn(0)
} else {
let value = array.value(i);
let value_str = ::std::str::from_utf8(value)?;
let key = value_map.get(value_str).copied().ok_or(
Error::ArrowSerialize(format!(
"String '{value_str}' not in enum"
))
)?;
writer.$write_fn(key);
}
}
return Ok(());
}
)*
if let Some(array) = column.as_string_view_opt() {
let value_map: std::collections::HashMap<&str, $pt> = enum_values
.iter()
.map(|(s, v)| (s.as_str(), *v))
.collect();
for i in 0..array.len() {
if array.is_null(i) {
writer.$write_fn(0)
} else {
let value = array.value(i);
let key = value_map.get(value).copied().ok_or(
Error::ArrowSerialize(format!(
"String '{value}' not in enum"
))
)?;
writer.$write_fn(key);
}
}
return Ok(());
}
if let Some(array) = column.as_binary_view_opt() {
let value_map: std::collections::HashMap<&str, $pt> = enum_values
.iter()
.map(|(s, v)| (s.as_str(), *v))
.collect();
for i in 0..array.len() {
if array.is_null(i) {
writer.$write_fn(0)
} else {
let value = array.value(i);
let value_str = ::std::str::from_utf8(value)?;
let key = value_map.get(value_str).copied().ok_or(
Error::ArrowSerialize(format!(
"String '{value_str}' not in enum"
))
)?;
writer.$write_fn(key);
}
}
return Ok(());
}
Err(Error::ArrowSerialize(format!(
"Expected DictionaryArray, PrimitiveArray, StringArray, or BinaryArray, got {:?}",
column.data_type()
)))
}
};
}
write_enum_values!(
write_enum8_values,
enum i8,
write_i8,
[Int8Type, Int16Type, Int32Type, Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type],
[Int8Type, Int16Type, Int32Type, Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type],
[i32, i64]
);
write_enum_values!(
write_enum16_values,
enum i16,
write_i16_le,
[Int16Type, Int8Type, Int32Type, Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type],
[Int16Type, Int8Type, Int32Type, Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type],
[i32, i64]
);
put_enum_values!(
put_enum8_values,
enum i8,
put_i8,
[Int8Type, Int16Type, Int32Type, Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type],
[Int8Type, Int16Type, Int32Type, Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type],
[i32, i64]
);
put_enum_values!(
put_enum16_values,
enum i16,
put_i16_le,
[Int16Type, Int8Type, Int32Type, Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type],
[Int16Type, Int8Type, Int32Type, Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type],
[i32, i64]
);
#[cfg(test)]
mod tests {
use std::sync::Arc;
use arrow::array::{DictionaryArray, Int8Array, Int16Array, StringArray};
use arrow::datatypes::{Int8Type, Int16Type};
use super::*;
type MockWriter = Vec<u8>;
#[tokio::test]
async fn test_serialize_enum8_dictionary() {
let pairs = vec![("a".to_string(), 1_i8), ("b".to_string(), 2_i8)];
let keys = Int8Array::from(vec![0, 1, 0]);
let values = StringArray::from(vec!["a", "b"]);
let array = Arc::new(DictionaryArray::<Int8Type>::try_new(keys, Arc::new(values)).unwrap())
as ArrayRef;
let mut writer = MockWriter::new();
serialize_async(&Type::Enum8(pairs), &mut writer, &array).await.unwrap();
assert_eq!(writer, vec![1, 2, 1]);
}
#[tokio::test]
async fn test_serialize_enum8_primitive() {
let pairs = vec![("a".to_string(), 1_i8), ("b".to_string(), 2_i8)];
let array = Arc::new(Int8Array::from(vec![1, 2, 1])) as ArrayRef;
let mut writer = MockWriter::new();
serialize_async(&Type::Enum8(pairs), &mut writer, &array).await.unwrap();
assert_eq!(writer, vec![1, 2, 1]);
}
#[tokio::test]
async fn test_serialize_enum8_string() {
let pairs = vec![("a".to_string(), 1_i8), ("b".to_string(), 2_i8)];
let array = Arc::new(StringArray::from(vec!["a", "b", "a"])) as ArrayRef;
let mut writer = MockWriter::new();
serialize_async(&Type::Enum8(pairs), &mut writer, &array).await.unwrap();
assert_eq!(writer, vec![1, 2, 1]);
}
#[tokio::test]
async fn test_serialize_enum8_nullable() {
let pairs = vec![("a".to_string(), 1_i8), ("b".to_string(), 2_i8)];
let array = Arc::new(StringArray::from(vec![Some("a"), None, Some("a")])) as ArrayRef;
let mut writer = MockWriter::new();
serialize_async(&Type::Enum8(pairs), &mut writer, &array).await.unwrap();
assert_eq!(writer, vec![1, 0, 1]);
}
#[tokio::test]
async fn test_serialize_enum16_dictionary() {
let pairs = vec![("x".to_string(), 10_i16), ("y".to_string(), 20_i16)];
let keys = Int16Array::from(vec![0, 1, 0]);
let values = StringArray::from(vec!["x", "y"]);
let array = Arc::new(DictionaryArray::<Int16Type>::try_new(keys, Arc::new(values)).unwrap())
as ArrayRef;
let mut writer = MockWriter::new();
serialize_async(&Type::Enum16(pairs), &mut writer, &array).await.unwrap();
assert_eq!(writer, vec![10, 0, 20, 0, 10, 0]); }
#[tokio::test]
async fn test_serialize_enum8_empty() {
let pairs = vec![("a".to_string(), 1_i8), ("b".to_string(), 2_i8)];
let array = Arc::new(Int8Array::from(Vec::<i8>::new())) as ArrayRef;
let mut writer = MockWriter::new();
serialize_async(&Type::Enum8(pairs), &mut writer, &array).await.unwrap();
assert!(writer.is_empty());
}
#[tokio::test]
async fn test_serialize_enum8_invalid_value() {
let pairs = vec![("a".to_string(), 1_i8), ("b".to_string(), 2_i8)];
let array = Arc::new(Int8Array::from(vec![3])) as ArrayRef;
let mut writer = MockWriter::new();
let result = serialize_async(&Type::Enum8(pairs), &mut writer, &array).await;
assert!(matches!(
result,
Err(Error::ArrowSerialize(msg))
if msg.contains("Value 3 not found in enum")
));
}
#[tokio::test]
async fn test_serialize_enum8_dictionary_invalid_array() {
let array = Arc::new(TimestampSecondArray::from(Vec::<i64>::new())) as ArrayRef;
let mut writer = MockWriter::new();
let result = serialize_async(&Type::Enum8(vec![]), &mut writer, &array).await;
assert!(matches!(result, Err(Error::ArrowSerialize(_))));
}
#[tokio::test]
async fn test_serialize_enum8_dictionary_invalid_value() {
let pairs = vec![("a".to_string(), 1_i8), ("b".to_string(), 2_i8)];
let keys = Int8Array::from(Vec::<i8>::new());
let values = TimestampSecondArray::from(Vec::<i64>::new());
let array = Arc::new(DictionaryArray::<Int8Type>::try_new(keys, Arc::new(values)).unwrap())
as ArrayRef;
let mut writer = MockWriter::new();
let result = serialize_async(&Type::Enum8(pairs), &mut writer, &array).await;
assert!(matches!(result, Err(Error::ArrowSerialize(_))));
}
#[tokio::test]
async fn test_serialize_enum8_dictionary_invalid_value_length() {
let pairs = vec![("a".to_string(), 1_i8), ("b".to_string(), 2_i8)];
let keys = Int8Array::from(vec![0, 1, 0]);
let values = StringArray::from(vec!["a", "b", "c"]);
let array = Arc::new(DictionaryArray::<Int8Type>::try_new(keys, Arc::new(values)).unwrap())
as ArrayRef;
let mut writer = MockWriter::new();
let result = serialize_async(&Type::Enum8(pairs), &mut writer, &array).await;
assert!(matches!(result, Err(Error::ArrowSerialize(_))));
}
#[tokio::test]
async fn test_serialize_enum16_uint_type_ok() {
let pairs = vec![("x".to_string(), 10_i16), ("y".to_string(), 20_i16)];
let array = Arc::new(UInt8Array::from(vec![10])) as ArrayRef;
let mut writer = MockWriter::new();
let result = serialize_async(&Type::Enum16(pairs), &mut writer, &array).await;
assert!(result.is_ok());
}
#[tokio::test]
async fn test_serialize_enum8_negative_values() {
let pairs = vec![("neg".to_string(), -1_i8), ("pos".to_string(), 1_i8)];
let array = Arc::new(Int8Array::from(vec![-1, 1, -1])) as ArrayRef;
let mut writer = MockWriter::new();
serialize_async(&Type::Enum8(pairs), &mut writer, &array).await.unwrap();
assert_eq!(writer, vec![255, 1, 255]); }
#[tokio::test]
async fn test_serialize_enum16_sparse_values() {
let pairs = vec![("a".to_string(), 100_i16), ("b".to_string(), 200_i16)];
let array = Arc::new(Int16Array::from(vec![100, 200, 100])) as ArrayRef;
let mut writer = MockWriter::new();
serialize_async(&Type::Enum16(pairs), &mut writer, &array).await.unwrap();
assert_eq!(writer, vec![100, 0, 200, 0, 100, 0]); }
#[tokio::test]
async fn test_serialize_enum8_dictionary_wrong_order() {
let pairs = vec![("a".to_string(), 1_i8), ("b".to_string(), 2_i8)];
let keys = Int8Array::from(vec![0, 1, 0]);
let values = StringArray::from(vec!["b", "a"]); let array = Arc::new(DictionaryArray::<Int8Type>::try_new(keys, Arc::new(values)).unwrap())
as ArrayRef;
let mut writer = MockWriter::new();
let result = serialize_async(&Type::Enum8(pairs), &mut writer, &array).await;
assert!(matches!(
result,
Err(Error::ArrowSerialize(msg))
if msg.contains("Enum value mismatch")
));
}
#[tokio::test]
async fn test_write_enum8_string_like_values() {
let cases = vec![
Arc::new(StringArray::from(vec![Some("a"), Some("b"), None])) as ArrayRef,
Arc::new(StringViewArray::from(vec![Some("a"), Some("b"), None])) as ArrayRef,
Arc::new(LargeStringArray::from(vec![Some("a"), Some("b"), None])) as ArrayRef,
Arc::new(BinaryArray::from_opt_vec(vec![Some(b"a"), Some(b"b"), None])) as ArrayRef,
Arc::new(BinaryViewArray::from(vec![Some(b"a" as &[u8]), Some(b"b"), None]))
as ArrayRef,
Arc::new(LargeBinaryArray::from_opt_vec(vec![Some(b"a"), Some(b"b"), None]))
as ArrayRef,
];
let enum_values = vec![("a".to_string(), 1), ("b".to_string(), 2)];
for array in cases {
let mut writer = MockWriter::new();
serialize_async(&Type::Enum8(enum_values.clone()), &mut writer, &array).await.unwrap();
assert_eq!(writer, vec![1, 2, 0]);
}
}
#[tokio::test]
async fn test_serialize_enum_wrong_type() {
let mut writer = MockWriter::new();
let result = serialize_async(
&Type::String,
&mut writer,
&(Arc::new(StringArray::from(Vec::<String>::new())) as ArrayRef),
)
.await;
assert!(matches!(result, Err(Error::ArrowSerialize(_))));
}
}
#[cfg(test)]
mod tests_sync {
use std::sync::Arc;
use arrow::array::{DictionaryArray, Int8Array, Int16Array, StringArray};
use arrow::datatypes::{Int8Type, Int16Type};
use super::*;
type MockWriter = Vec<u8>;
#[test]
fn test_serialize_enum8_dictionary() {
let pairs = vec![("a".to_string(), 1_i8), ("b".to_string(), 2_i8)];
let keys = Int8Array::from(vec![0, 1, 0]);
let values = StringArray::from(vec!["a", "b"]);
let array = Arc::new(DictionaryArray::<Int8Type>::try_new(keys, Arc::new(values)).unwrap())
as ArrayRef;
let mut writer = MockWriter::new();
serialize(&Type::Enum8(pairs), &mut writer, &array).unwrap();
assert_eq!(writer, vec![1, 2, 1]);
}
#[test]
fn test_serialize_enum8_primitive() {
let pairs = vec![("a".to_string(), 1_i8), ("b".to_string(), 2_i8)];
let array = Arc::new(Int8Array::from(vec![1, 2, 1])) as ArrayRef;
let mut writer = MockWriter::new();
serialize(&Type::Enum8(pairs), &mut writer, &array).unwrap();
assert_eq!(writer, vec![1, 2, 1]);
}
#[test]
fn test_serialize_enum8_string() {
let pairs = vec![("a".to_string(), 1_i8), ("b".to_string(), 2_i8)];
let array = Arc::new(StringArray::from(vec!["a", "b", "a"])) as ArrayRef;
let mut writer = MockWriter::new();
serialize(&Type::Enum8(pairs), &mut writer, &array).unwrap();
assert_eq!(writer, vec![1, 2, 1]);
}
#[test]
fn test_serialize_enum8_nullable() {
let pairs = vec![("a".to_string(), 1_i8), ("b".to_string(), 2_i8)];
let array = Arc::new(StringArray::from(vec![Some("a"), None, Some("a")])) as ArrayRef;
let mut writer = MockWriter::new();
serialize(&Type::Enum8(pairs), &mut writer, &array).unwrap();
assert_eq!(writer, vec![1, 0, 1]);
}
#[test]
fn test_serialize_enum16_dictionary() {
let pairs = vec![("x".to_string(), 10_i16), ("y".to_string(), 20_i16)];
let keys = Int16Array::from(vec![0, 1, 0]);
let values = StringArray::from(vec!["x", "y"]);
let array = Arc::new(DictionaryArray::<Int16Type>::try_new(keys, Arc::new(values)).unwrap())
as ArrayRef;
let mut writer = MockWriter::new();
serialize(&Type::Enum16(pairs), &mut writer, &array).unwrap();
assert_eq!(writer, vec![10, 0, 20, 0, 10, 0]); }
#[test]
fn test_serialize_enum8_empty() {
let pairs = vec![("a".to_string(), 1_i8), ("b".to_string(), 2_i8)];
let array = Arc::new(Int8Array::from(Vec::<i8>::new())) as ArrayRef;
let mut writer = MockWriter::new();
serialize(&Type::Enum8(pairs), &mut writer, &array).unwrap();
assert!(writer.is_empty());
}
#[test]
fn test_serialize_enum8_invalid_value() {
let pairs = vec![("a".to_string(), 1_i8), ("b".to_string(), 2_i8)];
let array = Arc::new(Int8Array::from(vec![3])) as ArrayRef;
let mut writer = MockWriter::new();
let result = serialize(&Type::Enum8(pairs), &mut writer, &array);
assert!(matches!(
result,
Err(Error::ArrowSerialize(msg))
if msg.contains("Value 3 not found in enum")
));
}
#[test]
fn test_serialize_enum8_dictionary_invalid_array() {
let array = Arc::new(TimestampSecondArray::from(Vec::<i64>::new())) as ArrayRef;
let mut writer = MockWriter::new();
let result = serialize(&Type::Enum8(vec![]), &mut writer, &array);
assert!(matches!(result, Err(Error::ArrowSerialize(_))));
}
#[test]
fn test_serialize_enum8_dictionary_invalid_value() {
let pairs = vec![("a".to_string(), 1_i8), ("b".to_string(), 2_i8)];
let keys = Int8Array::from(Vec::<i8>::new());
let values = TimestampSecondArray::from(Vec::<i64>::new());
let array = Arc::new(DictionaryArray::<Int8Type>::try_new(keys, Arc::new(values)).unwrap())
as ArrayRef;
let mut writer = MockWriter::new();
let result = serialize(&Type::Enum8(pairs), &mut writer, &array);
assert!(matches!(result, Err(Error::ArrowSerialize(_))));
}
#[test]
fn test_serialize_enum8_dictionary_invalid_value_length() {
let pairs = vec![("a".to_string(), 1_i8), ("b".to_string(), 2_i8)];
let keys = Int8Array::from(vec![0, 1, 0]);
let values = StringArray::from(vec!["a", "b", "c"]);
let array = Arc::new(DictionaryArray::<Int8Type>::try_new(keys, Arc::new(values)).unwrap())
as ArrayRef;
let mut writer = MockWriter::new();
let result = serialize(&Type::Enum8(pairs), &mut writer, &array);
assert!(matches!(result, Err(Error::ArrowSerialize(_))));
}
#[test]
fn test_serialize_enum16_uint_type_ok() {
let pairs = vec![("x".to_string(), 10_i16), ("y".to_string(), 20_i16)];
let array = Arc::new(UInt8Array::from(vec![10])) as ArrayRef;
let mut writer = MockWriter::new();
let result = serialize(&Type::Enum16(pairs), &mut writer, &array);
assert!(result.is_ok());
}
#[test]
fn test_serialize_enum8_negative_values() {
let pairs = vec![("neg".to_string(), -1_i8), ("pos".to_string(), 1_i8)];
let array = Arc::new(Int8Array::from(vec![-1, 1, -1])) as ArrayRef;
let mut writer = MockWriter::new();
serialize(&Type::Enum8(pairs), &mut writer, &array).unwrap();
assert_eq!(writer, vec![255, 1, 255]); }
#[test]
fn test_serialize_enum16_sparse_values() {
let pairs = vec![("a".to_string(), 100_i16), ("b".to_string(), 200_i16)];
let array = Arc::new(Int16Array::from(vec![100, 200, 100])) as ArrayRef;
let mut writer = MockWriter::new();
serialize(&Type::Enum16(pairs), &mut writer, &array).unwrap();
assert_eq!(writer, vec![100, 0, 200, 0, 100, 0]); }
#[test]
fn test_serialize_enum8_dictionary_wrong_order() {
let pairs = vec![("a".to_string(), 1_i8), ("b".to_string(), 2_i8)];
let keys = Int8Array::from(vec![0, 1, 0]);
let values = StringArray::from(vec!["b", "a"]); let array = Arc::new(DictionaryArray::<Int8Type>::try_new(keys, Arc::new(values)).unwrap())
as ArrayRef;
let mut writer = MockWriter::new();
let result = serialize(&Type::Enum8(pairs), &mut writer, &array);
assert!(matches!(
result,
Err(Error::ArrowSerialize(msg))
if msg.contains("Enum value mismatch")
));
}
#[test]
fn test_write_enum8_string_like_values() {
let cases = vec![
Arc::new(StringArray::from(vec![Some("a"), Some("b"), None])) as ArrayRef,
Arc::new(StringViewArray::from(vec![Some("a"), Some("b"), None])) as ArrayRef,
Arc::new(LargeStringArray::from(vec![Some("a"), Some("b"), None])) as ArrayRef,
Arc::new(BinaryArray::from_opt_vec(vec![Some(b"a"), Some(b"b"), None])) as ArrayRef,
Arc::new(BinaryViewArray::from(vec![Some(b"a" as &[u8]), Some(b"b"), None]))
as ArrayRef,
Arc::new(LargeBinaryArray::from_opt_vec(vec![Some(b"a"), Some(b"b"), None]))
as ArrayRef,
];
let enum_values = vec![("a".to_string(), 1), ("b".to_string(), 2)];
for array in cases {
let mut writer = MockWriter::new();
serialize(&Type::Enum8(enum_values.clone()), &mut writer, &array).unwrap();
assert_eq!(writer, vec![1, 2, 0]);
}
}
#[test]
fn test_serialize_enum_wrong_type() {
let mut writer = MockWriter::new();
let result = serialize(
&Type::String,
&mut writer,
&(Arc::new(StringArray::from(Vec::<String>::new())) as ArrayRef),
);
assert!(matches!(result, Err(Error::ArrowSerialize(_))));
}
}