use arrow::datatypes::DataType;
pub fn parse_v2_dtype(dtype: &str) -> String {
let chars: Vec<char> = dtype.chars().collect();
if chars.len() < 2 {
return "float64".to_string();
}
let (type_char, size_str) = if chars[0] == '<' || chars[0] == '>' || chars[0] == '|' {
if chars.len() < 3 {
return "float64".to_string();
}
(chars[1], &dtype[2..])
} else {
(chars[0], &dtype[1..])
};
let size: u32 = size_str.parse().unwrap_or(8);
match type_char {
'i' => match size {
1 => "int8",
2 => "int16",
4 => "int32",
8 => "int64",
_ => "int64",
},
'u' => match size {
1 => "uint8",
2 => "uint16",
4 => "uint32",
8 => "uint64",
_ => "uint64",
},
'f' => match size {
2 => "float16",
4 => "float32",
8 => "float64",
_ => "float64",
},
'b' => "bool",
_ => "float64",
}
.to_string()
}
pub fn zarr_dtype_to_arrow(dtype: &str) -> DataType {
match dtype {
"int8" => DataType::Int8,
"int16" => DataType::Int16,
"int32" => DataType::Int32,
"int64" => DataType::Int64,
"uint8" => DataType::UInt8,
"uint16" => DataType::UInt16,
"uint32" => DataType::UInt32,
"uint64" => DataType::UInt64,
"float16" => DataType::Float16,
"float32" => DataType::Float32,
"float64" => DataType::Float64,
"bool" => DataType::Boolean,
_ => DataType::Utf8,
}
}
pub fn zarr_dtype_to_arrow_dictionary(dtype: &str) -> DataType {
let value_type = zarr_dtype_to_arrow(dtype);
DataType::Dictionary(Box::new(DataType::Int16), Box::new(value_type))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_v2_dtype_all_types() {
assert_eq!(parse_v2_dtype("<i1"), "int8");
assert_eq!(parse_v2_dtype("<i2"), "int16");
assert_eq!(parse_v2_dtype("<i4"), "int32");
assert_eq!(parse_v2_dtype("<i8"), "int64");
assert_eq!(parse_v2_dtype("<u1"), "uint8");
assert_eq!(parse_v2_dtype("<u2"), "uint16");
assert_eq!(parse_v2_dtype("<u4"), "uint32");
assert_eq!(parse_v2_dtype("<u8"), "uint64");
assert_eq!(parse_v2_dtype("<f2"), "float16");
assert_eq!(parse_v2_dtype("<f4"), "float32");
assert_eq!(parse_v2_dtype("<f8"), "float64");
assert_eq!(parse_v2_dtype("|b1"), "bool");
}
#[test]
fn test_parse_v2_dtype_big_endian() {
assert_eq!(parse_v2_dtype(">i4"), "int32");
assert_eq!(parse_v2_dtype(">f8"), "float64");
}
#[test]
fn test_parse_v2_dtype_edge_cases() {
assert_eq!(parse_v2_dtype(""), "float64");
assert_eq!(parse_v2_dtype("x"), "float64");
assert_eq!(parse_v2_dtype("<"), "float64");
assert_eq!(parse_v2_dtype("<i"), "float64");
}
#[test]
fn test_zarr_dtype_to_arrow_all_types() {
assert_eq!(zarr_dtype_to_arrow("int8"), DataType::Int8);
assert_eq!(zarr_dtype_to_arrow("int16"), DataType::Int16);
assert_eq!(zarr_dtype_to_arrow("int32"), DataType::Int32);
assert_eq!(zarr_dtype_to_arrow("int64"), DataType::Int64);
assert_eq!(zarr_dtype_to_arrow("uint8"), DataType::UInt8);
assert_eq!(zarr_dtype_to_arrow("uint16"), DataType::UInt16);
assert_eq!(zarr_dtype_to_arrow("uint32"), DataType::UInt32);
assert_eq!(zarr_dtype_to_arrow("uint64"), DataType::UInt64);
assert_eq!(zarr_dtype_to_arrow("float16"), DataType::Float16);
assert_eq!(zarr_dtype_to_arrow("float32"), DataType::Float32);
assert_eq!(zarr_dtype_to_arrow("float64"), DataType::Float64);
assert_eq!(zarr_dtype_to_arrow("bool"), DataType::Boolean);
assert_eq!(zarr_dtype_to_arrow("unknown"), DataType::Utf8);
}
}