zarr_datafusion/reader/
dtype.rs1use arrow::datatypes::DataType;
6
7pub fn parse_v2_dtype(dtype: &str) -> String {
10 let chars: Vec<char> = dtype.chars().collect();
15 if chars.len() < 2 {
16 return "float64".to_string();
17 }
18
19 let (type_char, size_str) = if chars[0] == '<' || chars[0] == '>' || chars[0] == '|' {
21 if chars.len() < 3 {
22 return "float64".to_string();
23 }
24 (chars[1], &dtype[2..])
25 } else {
26 (chars[0], &dtype[1..])
27 };
28
29 let size: u32 = size_str.parse().unwrap_or(8);
30
31 match type_char {
32 'i' => match size {
33 1 => "int8",
34 2 => "int16",
35 4 => "int32",
36 8 => "int64",
37 _ => "int64",
38 },
39 'u' => match size {
40 1 => "uint8",
41 2 => "uint16",
42 4 => "uint32",
43 8 => "uint64",
44 _ => "uint64",
45 },
46 'f' => match size {
47 2 => "float16",
48 4 => "float32",
49 8 => "float64",
50 _ => "float64",
51 },
52 'b' => "bool",
53 _ => "float64",
54 }
55 .to_string()
56}
57
58pub fn zarr_dtype_to_arrow(dtype: &str) -> DataType {
60 match dtype {
61 "int8" => DataType::Int8,
62 "int16" => DataType::Int16,
63 "int32" => DataType::Int32,
64 "int64" => DataType::Int64,
65 "uint8" => DataType::UInt8,
66 "uint16" => DataType::UInt16,
67 "uint32" => DataType::UInt32,
68 "uint64" => DataType::UInt64,
69 "float16" => DataType::Float16,
70 "float32" => DataType::Float32,
71 "float64" => DataType::Float64,
72 "bool" => DataType::Boolean,
73 _ => DataType::Utf8,
74 }
75}
76
77pub fn zarr_dtype_to_arrow_dictionary(dtype: &str) -> DataType {
80 let value_type = zarr_dtype_to_arrow(dtype);
81 DataType::Dictionary(Box::new(DataType::Int16), Box::new(value_type))
82}
83
84#[cfg(test)]
85mod tests {
86 use super::*;
87
88 #[test]
89 fn test_parse_v2_dtype_all_types() {
90 assert_eq!(parse_v2_dtype("<i1"), "int8");
91 assert_eq!(parse_v2_dtype("<i2"), "int16");
92 assert_eq!(parse_v2_dtype("<i4"), "int32");
93 assert_eq!(parse_v2_dtype("<i8"), "int64");
94 assert_eq!(parse_v2_dtype("<u1"), "uint8");
95 assert_eq!(parse_v2_dtype("<u2"), "uint16");
96 assert_eq!(parse_v2_dtype("<u4"), "uint32");
97 assert_eq!(parse_v2_dtype("<u8"), "uint64");
98 assert_eq!(parse_v2_dtype("<f2"), "float16");
99 assert_eq!(parse_v2_dtype("<f4"), "float32");
100 assert_eq!(parse_v2_dtype("<f8"), "float64");
101 assert_eq!(parse_v2_dtype("|b1"), "bool");
102 }
103
104 #[test]
105 fn test_parse_v2_dtype_big_endian() {
106 assert_eq!(parse_v2_dtype(">i4"), "int32");
107 assert_eq!(parse_v2_dtype(">f8"), "float64");
108 }
109
110 #[test]
111 fn test_parse_v2_dtype_edge_cases() {
112 assert_eq!(parse_v2_dtype(""), "float64");
113 assert_eq!(parse_v2_dtype("x"), "float64");
114 assert_eq!(parse_v2_dtype("<"), "float64");
115 assert_eq!(parse_v2_dtype("<i"), "float64");
116 }
117
118 #[test]
119 fn test_zarr_dtype_to_arrow_all_types() {
120 assert_eq!(zarr_dtype_to_arrow("int8"), DataType::Int8);
121 assert_eq!(zarr_dtype_to_arrow("int16"), DataType::Int16);
122 assert_eq!(zarr_dtype_to_arrow("int32"), DataType::Int32);
123 assert_eq!(zarr_dtype_to_arrow("int64"), DataType::Int64);
124 assert_eq!(zarr_dtype_to_arrow("uint8"), DataType::UInt8);
125 assert_eq!(zarr_dtype_to_arrow("uint16"), DataType::UInt16);
126 assert_eq!(zarr_dtype_to_arrow("uint32"), DataType::UInt32);
127 assert_eq!(zarr_dtype_to_arrow("uint64"), DataType::UInt64);
128 assert_eq!(zarr_dtype_to_arrow("float16"), DataType::Float16);
129 assert_eq!(zarr_dtype_to_arrow("float32"), DataType::Float32);
130 assert_eq!(zarr_dtype_to_arrow("float64"), DataType::Float64);
131 assert_eq!(zarr_dtype_to_arrow("bool"), DataType::Boolean);
132 assert_eq!(zarr_dtype_to_arrow("unknown"), DataType::Utf8);
133 }
134}