use arrow::datatypes::{Field, Schema};
use std::fs;
use std::path::Path;
use tracing::{debug, info, instrument};
use super::dtype::{parse_v2_dtype, zarr_dtype_to_arrow, zarr_dtype_to_arrow_dictionary};
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum ZarrVersion {
V2,
V3,
}
pub fn detect_zarr_version(
store_path: &str,
) -> Result<ZarrVersion, Box<dyn std::error::Error + Send + Sync>> {
let root = Path::new(store_path);
if root.join("zarr.json").exists() {
return Ok(ZarrVersion::V3);
}
if root.join(".zgroup").exists() || root.join(".zarray").exists() {
return Ok(ZarrVersion::V2);
}
for entry in fs::read_dir(root)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
if path.join("zarr.json").exists() {
return Ok(ZarrVersion::V3);
}
if path.join(".zarray").exists() {
return Ok(ZarrVersion::V2);
}
}
}
Err("Could not detect Zarr version: no metadata files found".into())
}
#[derive(Debug, Clone)]
pub struct ZarrArrayMeta {
pub name: String,
pub data_type: String,
pub shape: Vec<u64>,
pub coord_min_max: Option<(f64, f64)>,
}
impl ZarrArrayMeta {
pub fn is_coordinate(&self) -> bool {
self.shape.len() == 1
}
}
#[derive(Debug, Clone)]
pub struct ZarrStoreMeta {
pub coords: Vec<ZarrArrayMeta>, pub data_vars: Vec<ZarrArrayMeta>, pub total_rows: usize, }
pub fn discover_arrays(
store_path: &str,
) -> Result<ZarrStoreMeta, Box<dyn std::error::Error + Send + Sync>> {
let version = detect_zarr_version(store_path)?;
match version {
ZarrVersion::V2 => discover_arrays_v2(store_path),
ZarrVersion::V3 => discover_arrays_v3(store_path),
}
}
fn discover_arrays_v2(
store_path: &str,
) -> Result<ZarrStoreMeta, Box<dyn std::error::Error + Send + Sync>> {
let root = Path::new(store_path);
let mut arrays: Vec<ZarrArrayMeta> = Vec::new();
for entry in fs::read_dir(root)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
let zarray = path.join(".zarray");
if zarray.exists() {
let content = fs::read_to_string(&zarray)?;
let meta: serde_json::Value = serde_json::from_str(&content)?;
let name = path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown")
.to_string();
let shape: Vec<u64> = meta
.get("shape")
.and_then(|v| v.as_array())
.map(|arr| arr.iter().filter_map(|v| v.as_u64()).collect())
.unwrap_or_default();
let dtype_raw = meta.get("dtype").and_then(|v| v.as_str()).unwrap_or("<f8");
let data_type = parse_v2_dtype(dtype_raw);
arrays.push(ZarrArrayMeta {
name,
data_type,
shape,
coord_min_max: None, });
}
}
}
separate_and_sort_arrays(arrays, store_path)
}
fn discover_arrays_v3(
store_path: &str,
) -> Result<ZarrStoreMeta, Box<dyn std::error::Error + Send + Sync>> {
let root = Path::new(store_path);
let mut arrays: Vec<ZarrArrayMeta> = Vec::new();
for entry in fs::read_dir(root)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
let zarr_json = path.join("zarr.json");
if zarr_json.exists() {
let content = fs::read_to_string(&zarr_json)?;
let meta: serde_json::Value = serde_json::from_str(&content)?;
if meta.get("node_type").and_then(|v| v.as_str()) == Some("array") {
let name = path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown")
.to_string();
let shape: Vec<u64> = meta
.get("shape")
.and_then(|v| v.as_array())
.map(|arr| arr.iter().filter_map(|v| v.as_u64()).collect())
.unwrap_or_default();
let data_type = meta
.get("data_type")
.and_then(|v| v.as_str())
.unwrap_or("float64")
.to_string();
arrays.push(ZarrArrayMeta {
name,
data_type,
shape,
coord_min_max: None, });
}
}
}
}
separate_and_sort_arrays(arrays, store_path)
}
fn compute_coord_min_max(
store_path: &str,
coord_name: &str,
data_type: &str,
) -> Option<(f64, f64)> {
use zarrs::array::Array;
use zarrs::array_subset::ArraySubset;
use zarrs::filesystem::FilesystemStore;
let store = FilesystemStore::new(store_path).ok()?;
let array_path = format!("/{}", coord_name);
let array = Array::open(store.into(), &array_path).ok()?;
let shape = array.shape();
let subset = ArraySubset::new_with_start_shape(vec![0], shape.to_vec()).ok()?;
match data_type {
"float64" => {
let data: Vec<f64> = array.retrieve_array_subset_elements(&subset).ok()?;
if data.is_empty() {
return None;
}
let min = data.iter().cloned().fold(f64::INFINITY, f64::min);
let max = data.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
Some((min, max))
}
"float32" => {
let data: Vec<f32> = array.retrieve_array_subset_elements(&subset).ok()?;
if data.is_empty() {
return None;
}
let min = data.iter().cloned().fold(f32::INFINITY, f32::min) as f64;
let max = data.iter().cloned().fold(f32::NEG_INFINITY, f32::max) as f64;
Some((min, max))
}
"int64" => {
let data: Vec<i64> = array.retrieve_array_subset_elements(&subset).ok()?;
if data.is_empty() {
return None;
}
let min = *data.iter().min()? as f64;
let max = *data.iter().max()? as f64;
Some((min, max))
}
"int32" => {
let data: Vec<i32> = array.retrieve_array_subset_elements(&subset).ok()?;
if data.is_empty() {
return None;
}
let min = *data.iter().min()? as f64;
let max = *data.iter().max()? as f64;
Some((min, max))
}
"int16" => {
let data: Vec<i16> = array.retrieve_array_subset_elements(&subset).ok()?;
if data.is_empty() {
return None;
}
let min = *data.iter().min()? as f64;
let max = *data.iter().max()? as f64;
Some((min, max))
}
"uint64" => {
let data: Vec<u64> = array.retrieve_array_subset_elements(&subset).ok()?;
if data.is_empty() {
return None;
}
let min = *data.iter().min()? as f64;
let max = *data.iter().max()? as f64;
Some((min, max))
}
"uint32" => {
let data: Vec<u32> = array.retrieve_array_subset_elements(&subset).ok()?;
if data.is_empty() {
return None;
}
let min = *data.iter().min()? as f64;
let max = *data.iter().max()? as f64;
Some((min, max))
}
_ => {
debug!(data_type = %data_type, "Unsupported data type for min/max computation");
None
}
}
}
fn infer_coord_order_from_data_vars(
mut coords: Vec<ZarrArrayMeta>,
data_vars: &[ZarrArrayMeta],
) -> Vec<ZarrArrayMeta> {
if coords.is_empty() || data_vars.is_empty() {
coords.sort_by(|a, b| a.name.cmp(&b.name));
return coords;
}
for var in data_vars {
if var.shape.len() != coords.len() {
continue;
}
let mut ordered: Vec<ZarrArrayMeta> = Vec::with_capacity(coords.len());
let mut used = vec![false; coords.len()];
let mut success = true;
for &dim_size in &var.shape {
let mut found: Option<usize> = None;
for (j, c) in coords.iter().enumerate() {
if !used[j] && c.shape.first() == Some(&dim_size) {
found = Some(j);
break;
}
}
if let Some(j) = found {
ordered.push(coords[j].clone());
used[j] = true;
} else {
success = false;
break;
}
}
if success && ordered.len() == coords.len() {
return ordered;
}
}
coords.sort_by(|a, b| a.name.cmp(&b.name));
coords
}
fn separate_and_sort_arrays(
arrays: Vec<ZarrArrayMeta>,
store_path: &str,
) -> Result<ZarrStoreMeta, Box<dyn std::error::Error + Send + Sync>> {
let (mut coords, mut data_vars): (Vec<_>, Vec<_>) =
arrays.into_iter().partition(|a| a.is_coordinate());
data_vars.sort_by(|a, b| a.name.cmp(&b.name));
coords = infer_coord_order_from_data_vars(coords, &data_vars);
for coord in &mut coords {
if let Some(min_max) = compute_coord_min_max(store_path, &coord.name, &coord.data_type) {
debug!(
coord = %coord.name,
min = min_max.0,
max = min_max.1,
"Computed coordinate min/max"
);
coord.coord_min_max = Some(min_max);
}
}
let total_rows: usize = coords.iter().map(|c| c.shape[0] as usize).product();
Ok(ZarrStoreMeta {
coords,
data_vars,
total_rows,
})
}
pub fn infer_schema(store_path: &str) -> Result<Schema, Box<dyn std::error::Error + Send + Sync>> {
let (schema, _meta) = infer_schema_with_meta(store_path)?;
Ok(schema)
}
pub fn infer_schema_with_meta(
store_path: &str,
) -> Result<(Schema, ZarrStoreMeta), Box<dyn std::error::Error + Send + Sync>> {
let meta = discover_arrays(store_path)?;
let mut fields: Vec<Field> = Vec::new();
for coord in &meta.coords {
fields.push(Field::new(
&coord.name,
zarr_dtype_to_arrow_dictionary(&coord.data_type),
false,
));
}
for var in &meta.data_vars {
fields.push(Field::new(
&var.name,
zarr_dtype_to_arrow(&var.data_type),
true,
));
}
Ok((Schema::new(fields), meta))
}
use zarrs::storage::AsyncReadableListableStorage;
use zarrs_object_store::object_store::path::Path as ObjectPath;
#[instrument(level = "debug", skip_all)]
pub async fn discover_arrays_async(
store: &AsyncReadableListableStorage,
prefix: &ObjectPath,
) -> Result<ZarrStoreMeta, Box<dyn std::error::Error + Send + Sync>> {
debug!("Detecting Zarr version");
let version = detect_zarr_version_async(store, prefix).await?;
info!(?version, "Zarr version detected");
let result = match version {
ZarrVersion::V2 => discover_arrays_v2_async(store, prefix).await,
ZarrVersion::V3 => discover_arrays_v3_async(store, prefix).await,
};
if let Ok(ref meta) = result {
info!(
coords = meta.coords.len(),
data_vars = meta.data_vars.len(),
"Arrays discovered"
);
for coord in &meta.coords {
debug!(name = %coord.name, shape = ?coord.shape, dtype = %coord.data_type, "Coordinate");
}
for var in &meta.data_vars {
debug!(name = %var.name, shape = ?var.shape, dtype = %var.data_type, "Data variable");
}
}
result
}
pub async fn detect_zarr_version_async(
store: &AsyncReadableListableStorage,
prefix: &ObjectPath,
) -> Result<ZarrVersion, Box<dyn std::error::Error + Send + Sync>> {
use zarrs::storage::AsyncListableStorageTraits;
use zarrs::storage::StorePrefix;
let zarr_json_path = format!("{}/zarr.json", prefix);
if store_key_exists(store, &zarr_json_path).await {
return Ok(ZarrVersion::V3);
}
let zgroup_path = format!("{}/.zgroup", prefix);
if store_key_exists(store, &zgroup_path).await {
return Ok(ZarrVersion::V2);
}
let prefix_str = if prefix.as_ref().is_empty() {
"/".to_string()
} else {
format!("{}/", prefix.as_ref().trim_end_matches('/'))
};
let store_prefix = StorePrefix::new(&prefix_str)
.map_err(|e| format!("Invalid prefix '{}': {}", prefix_str, e))?;
let entries = store
.list_dir(&store_prefix)
.await
.map_err(|e| format!("Failed to list directory: {}", e))?;
for subdir in entries.prefixes() {
let subdir_str = subdir.as_str().trim_end_matches('/');
let v3_path = format!("{}/zarr.json", subdir_str);
if store_key_exists(store, &v3_path).await {
return Ok(ZarrVersion::V3);
}
let v2_path = format!("{}/.zarray", subdir_str);
if store_key_exists(store, &v2_path).await {
return Ok(ZarrVersion::V2);
}
}
Err("Could not detect Zarr version: no metadata files found".into())
}
async fn store_key_exists(store: &AsyncReadableListableStorage, key: &str) -> bool {
use zarrs::storage::{AsyncReadableStorageTraits, StoreKey};
let store_key = match StoreKey::new(key) {
Ok(k) => k,
Err(_) => return false,
};
matches!(store.get(&store_key).await, Ok(Some(_)))
}
async fn store_get_string(
store: &AsyncReadableListableStorage,
key: &str,
) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
use zarrs::storage::{AsyncReadableStorageTraits, StoreKey};
let store_key = StoreKey::new(key).map_err(|e| format!("Invalid key '{}': {}", key, e))?;
let bytes = store
.get(&store_key)
.await
.map_err(|e| format!("Failed to read '{}': {}", key, e))?
.ok_or_else(|| format!("Key not found: {}", key))?;
String::from_utf8(bytes.to_vec())
.map_err(|e| format!("Invalid UTF-8 in '{}': {}", key, e).into())
}
async fn discover_arrays_v2_async(
store: &AsyncReadableListableStorage,
prefix: &ObjectPath,
) -> Result<ZarrStoreMeta, Box<dyn std::error::Error + Send + Sync>> {
use zarrs::storage::{AsyncListableStorageTraits, StorePrefix};
let mut arrays: Vec<ZarrArrayMeta> = Vec::new();
let prefix_str = if prefix.as_ref().is_empty() {
"/".to_string()
} else {
format!("{}/", prefix.as_ref().trim_end_matches('/'))
};
let store_prefix = StorePrefix::new(&prefix_str)
.map_err(|e| format!("Invalid prefix '{}': {}", prefix_str, e))?;
let entries = store
.list_dir(&store_prefix)
.await
.map_err(|e| format!("Failed to list directory: {}", e))?;
for subdir in entries.prefixes() {
let subdir_str = subdir.as_str().trim_end_matches('/');
let zarray_path = format!("{}/.zarray", subdir_str);
if let Ok(content) = store_get_string(store, &zarray_path).await {
let meta: serde_json::Value = serde_json::from_str(&content)?;
let name = subdir_str
.trim_end_matches('/')
.rsplit('/')
.next()
.unwrap_or("unknown")
.to_string();
let shape: Vec<u64> = meta
.get("shape")
.and_then(|v| v.as_array())
.map(|arr| arr.iter().filter_map(|v| v.as_u64()).collect())
.unwrap_or_default();
let dtype_raw = meta.get("dtype").and_then(|v| v.as_str()).unwrap_or("<f8");
let data_type = parse_v2_dtype(dtype_raw);
arrays.push(ZarrArrayMeta {
name,
data_type,
shape,
coord_min_max: None, });
}
}
separate_and_sort_arrays_async(store, prefix, arrays).await
}
async fn discover_arrays_v3_async(
store: &AsyncReadableListableStorage,
prefix: &ObjectPath,
) -> Result<ZarrStoreMeta, Box<dyn std::error::Error + Send + Sync>> {
use zarrs::storage::{AsyncListableStorageTraits, StorePrefix};
let mut arrays: Vec<ZarrArrayMeta> = Vec::new();
let prefix_str = if prefix.as_ref().is_empty() {
"/".to_string()
} else {
format!("{}/", prefix.as_ref().trim_end_matches('/'))
};
let store_prefix = StorePrefix::new(&prefix_str)
.map_err(|e| format!("Invalid prefix '{}': {}", prefix_str, e))?;
let entries = store
.list_dir(&store_prefix)
.await
.map_err(|e| format!("Failed to list directory: {}", e))?;
for subdir in entries.prefixes() {
let subdir_str = subdir.as_str().trim_end_matches('/');
let zarr_json_path = format!("{}/zarr.json", subdir_str);
if let Ok(content) = store_get_string(store, &zarr_json_path).await {
let meta: serde_json::Value = serde_json::from_str(&content)?;
if meta.get("node_type").and_then(|v| v.as_str()) == Some("array") {
let name = subdir_str
.trim_end_matches('/')
.rsplit('/')
.next()
.unwrap_or("unknown")
.to_string();
let shape: Vec<u64> = meta
.get("shape")
.and_then(|v| v.as_array())
.map(|arr| arr.iter().filter_map(|v| v.as_u64()).collect())
.unwrap_or_default();
let data_type = meta
.get("data_type")
.and_then(|v| v.as_str())
.unwrap_or("float64")
.to_string();
arrays.push(ZarrArrayMeta {
name,
data_type,
shape,
coord_min_max: None, });
}
}
}
separate_and_sort_arrays_async(store, prefix, arrays).await
}
async fn separate_and_sort_arrays_async(
store: &AsyncReadableListableStorage,
prefix: &ObjectPath,
arrays: Vec<ZarrArrayMeta>,
) -> Result<ZarrStoreMeta, Box<dyn std::error::Error + Send + Sync>> {
use zarrs::array::Array;
use zarrs::array_subset::ArraySubset;
let (mut coords, mut data_vars): (Vec<_>, Vec<_>) =
arrays.into_iter().partition(|a| a.is_coordinate());
data_vars.sort_by(|a, b| a.name.cmp(&b.name));
coords = infer_coord_order_from_data_vars(coords, &data_vars);
for coord in &mut coords {
let coord_path = format!("/{}/{}", prefix.as_ref(), coord.name);
debug!(coord = %coord.name, path = %coord_path, "Attempting to compute min/max");
match Array::async_open(store.clone(), &coord_path).await {
Err(e) => {
debug!(coord = %coord.name, error = %e, "Failed to open coordinate array");
continue;
}
Ok(arr) => {
let shape = arr.shape();
if let Ok(subset) = ArraySubset::new_with_start_shape(vec![0], shape.to_vec()) {
let min_max: Option<(f64, f64)> = match coord.data_type.as_str() {
"float64" => arr
.async_retrieve_array_subset_elements::<f64>(&subset)
.await
.ok()
.filter(|data| !data.is_empty())
.map(|data| {
let min = data.iter().cloned().fold(f64::INFINITY, f64::min);
let max = data.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
(min, max)
}),
"float32" => arr
.async_retrieve_array_subset_elements::<f32>(&subset)
.await
.ok()
.filter(|data| !data.is_empty())
.map(|data| {
let min = data.iter().cloned().fold(f32::INFINITY, f32::min) as f64;
let max =
data.iter().cloned().fold(f32::NEG_INFINITY, f32::max) as f64;
(min, max)
}),
"int64" => arr
.async_retrieve_array_subset_elements::<i64>(&subset)
.await
.ok()
.filter(|data| !data.is_empty())
.and_then(|data| {
let min = *data.iter().min()? as f64;
let max = *data.iter().max()? as f64;
Some((min, max))
}),
"int32" => arr
.async_retrieve_array_subset_elements::<i32>(&subset)
.await
.ok()
.filter(|data| !data.is_empty())
.and_then(|data| {
let min = *data.iter().min()? as f64;
let max = *data.iter().max()? as f64;
Some((min, max))
}),
"int16" => arr
.async_retrieve_array_subset_elements::<i16>(&subset)
.await
.ok()
.filter(|data| !data.is_empty())
.and_then(|data| {
let min = *data.iter().min()? as f64;
let max = *data.iter().max()? as f64;
Some((min, max))
}),
_ => None,
};
if let Some((min, max)) = min_max {
debug!(
coord = %coord.name,
min = min,
max = max,
"Computed coordinate min/max (async)"
);
coord.coord_min_max = Some((min, max));
}
}
} } }
let total_rows: usize = coords.iter().map(|c| c.shape[0] as usize).product();
Ok(ZarrStoreMeta {
coords,
data_vars,
total_rows,
})
}
#[instrument(level = "debug", skip_all)]
pub async fn infer_schema_async(
store: &AsyncReadableListableStorage,
prefix: &ObjectPath,
) -> Result<Schema, Box<dyn std::error::Error + Send + Sync>> {
let (schema, _meta) = infer_schema_with_meta_async(store, prefix).await?;
Ok(schema)
}
#[instrument(level = "debug", skip_all)]
pub async fn infer_schema_with_meta_async(
store: &AsyncReadableListableStorage,
prefix: &ObjectPath,
) -> Result<(Schema, ZarrStoreMeta), Box<dyn std::error::Error + Send + Sync>> {
debug!("Starting async schema inference");
let meta = discover_arrays_async(store, prefix).await?;
let mut fields: Vec<Field> = Vec::new();
for coord in &meta.coords {
fields.push(Field::new(
&coord.name,
zarr_dtype_to_arrow_dictionary(&coord.data_type),
false,
));
}
for var in &meta.data_vars {
fields.push(Field::new(
&var.name,
zarr_dtype_to_arrow(&var.data_type),
true,
));
}
info!(num_fields = fields.len(), "Schema inferred");
Ok((Schema::new(fields), meta))
}
#[cfg(test)]
mod tests {
use super::*;
use arrow::datatypes::DataType;
#[test]
fn test_detect_zarr_version_v2() {
assert_eq!(
detect_zarr_version("data/synthetic_v2.zarr").unwrap(),
ZarrVersion::V2
);
assert_eq!(
detect_zarr_version("data/synthetic_v2_blosc.zarr").unwrap(),
ZarrVersion::V2
);
}
#[test]
fn test_detect_zarr_version_v3() {
assert_eq!(
detect_zarr_version("data/synthetic_v3.zarr").unwrap(),
ZarrVersion::V3
);
assert_eq!(
detect_zarr_version("data/synthetic_v3_blosc.zarr").unwrap(),
ZarrVersion::V3
);
}
#[test]
fn test_detect_zarr_version_error() {
assert!(detect_zarr_version("data/nonexistent.zarr").is_err());
}
#[test]
fn test_array_meta_is_coordinate() {
let coord = ZarrArrayMeta {
name: "lat".to_string(),
data_type: "float64".to_string(),
shape: vec![10],
coord_min_max: Some((0.0, 90.0)),
};
assert!(coord.is_coordinate());
let data_2d = ZarrArrayMeta {
name: "temp".to_string(),
data_type: "float64".to_string(),
shape: vec![10, 10],
coord_min_max: None,
};
assert!(!data_2d.is_coordinate());
let data_3d = ZarrArrayMeta {
name: "temp".to_string(),
data_type: "float64".to_string(),
shape: vec![7, 10, 10],
coord_min_max: None,
};
assert!(!data_3d.is_coordinate());
}
#[test]
fn test_discover_arrays_v2() {
let meta = discover_arrays("data/synthetic_v2.zarr").unwrap();
assert_eq!(meta.coords.len(), 3);
let coord_names: Vec<_> = meta.coords.iter().map(|c| c.name.as_str()).collect();
assert_eq!(coord_names, vec!["time", "lon", "lat"]);
assert_eq!(meta.data_vars.len(), 2);
let var_names: Vec<_> = meta.data_vars.iter().map(|v| v.name.as_str()).collect();
assert_eq!(var_names, vec!["humidity", "temperature"]);
assert_eq!(meta.coords[0].shape, vec![7]); assert_eq!(meta.coords[1].shape, vec![10]); assert_eq!(meta.coords[2].shape, vec![10]); assert_eq!(meta.data_vars[0].shape, vec![7, 10, 10]); assert_eq!(meta.data_vars[1].shape, vec![7, 10, 10]);
for arr in meta.coords.iter().chain(meta.data_vars.iter()) {
assert_eq!(arr.data_type, "int64");
}
}
#[test]
fn test_discover_arrays_v3() {
let meta = discover_arrays("data/synthetic_v3.zarr").unwrap();
assert_eq!(meta.coords.len(), 3);
assert_eq!(meta.data_vars.len(), 2);
let coord_names: Vec<_> = meta.coords.iter().map(|c| c.name.as_str()).collect();
assert_eq!(coord_names, vec!["time", "lon" ,"lat"]);
let var_names: Vec<_> = meta.data_vars.iter().map(|v| v.name.as_str()).collect();
assert_eq!(var_names, vec!["humidity", "temperature"]);
}
#[test]
fn test_infer_schema_structure() {
let schema = infer_schema("data/synthetic_v2.zarr").unwrap();
assert_eq!(schema.fields().len(), 5);
let names: Vec<_> = schema.fields().iter().map(|f| f.name().as_str()).collect();
assert_eq!(names, vec!["time", "lon", "lat", "humidity", "temperature"]);
}
#[test]
fn test_infer_schema_coord_types() {
let schema = infer_schema("data/synthetic_v2.zarr").unwrap();
for i in 0..3 {
let field = schema.field(i);
assert!(
matches!(field.data_type(), DataType::Dictionary(_, _)),
"Coordinate {} should be Dictionary type",
field.name()
);
assert!(
!field.is_nullable(),
"Coordinate {} should not be nullable",
field.name()
);
}
}
#[test]
fn test_infer_schema_data_var_types() {
let schema = infer_schema("data/synthetic_v2.zarr").unwrap();
for i in 3..5 {
let field = schema.field(i);
assert_eq!(
field.data_type(),
&DataType::Int64,
"Data var {} should be Int64",
field.name()
);
assert!(
field.is_nullable(),
"Data var {} should be nullable",
field.name()
);
}
}
#[test]
fn test_infer_schema_v2_v3_parity() {
let schema_v2 = infer_schema("data/synthetic_v2.zarr").unwrap();
let schema_v3 = infer_schema("data/synthetic_v3.zarr").unwrap();
assert_eq!(schema_v2.fields().len(), schema_v3.fields().len());
for (f2, f3) in schema_v2.fields().iter().zip(schema_v3.fields().iter()) {
assert_eq!(f2.name(), f3.name(), "Field names should match");
assert_eq!(
f2.data_type(),
f3.data_type(),
"Data types should match for {}",
f2.name()
);
assert_eq!(
f2.is_nullable(),
f3.is_nullable(),
"Nullability should match for {}",
f2.name()
);
}
}
}