#![cfg(feature = "zarr")]
use somatize_core::cache::CacheKey;
use somatize_core::store::{DataRef, DataStore, ZarrStore};
use somatize_core::value::Value;
fn store() -> Option<ZarrStore> {
ZarrStore::from_env(
"soma-zarr-test/",
std::env::temp_dir().join("soma-zarr-integration"),
4, )
.ok()
}
#[test]
fn roundtrip_tensor_2d() {
let Some(store) = store() else {
eprintln!("Skipping: env vars not set");
return;
};
let key = CacheKey::hash_data(b"zarr_test_2d");
let data: Vec<f64> = (0..18).map(|i| i as f64).collect();
let value = Value::tensor(data.clone(), vec![6, 3]);
let data_ref = store.put(&key, &value).unwrap();
assert!(matches!(data_ref, DataRef::Zarr { .. }));
println!("PUT ok: {data_ref:?}");
let cache_dir = std::env::temp_dir().join("soma-zarr-integration");
let _ = std::fs::remove_dir_all(&cache_dir);
std::fs::create_dir_all(&cache_dir).ok();
let retrieved = store.get(&data_ref).unwrap();
assert_eq!(retrieved, value);
println!("Full GET ok");
let meta = store.meta(&data_ref).unwrap();
assert_eq!(meta.total_rows, 6);
assert_eq!(meta.shape_tail, vec![3]);
assert_eq!(meta.dtype, "tensor");
println!("META ok: {meta:?}");
store.remove(&data_ref).unwrap();
println!("REMOVE ok");
}
#[test]
fn partial_read_within_chunk() {
let Some(store) = store() else { return };
let key = CacheKey::hash_data(b"zarr_test_partial");
let data: Vec<f64> = (0..16).map(|i| i as f64).collect();
let value = Value::tensor(data, vec![8, 2]);
let data_ref = store.put(&key, &value).unwrap();
let cache_dir = std::env::temp_dir().join("soma-zarr-integration");
let _ = std::fs::remove_dir_all(&cache_dir);
std::fs::create_dir_all(&cache_dir).ok();
let sliced = store.get_rows(&data_ref, 1, 2).unwrap();
let (vals, shape) = sliced.as_tensor().unwrap();
assert_eq!(shape, &[2, 2]);
assert_eq!(vals, &[2.0, 3.0, 4.0, 5.0]);
println!("Partial read within chunk ok");
store.remove(&data_ref).unwrap();
}
#[test]
fn partial_read_across_chunks() {
let Some(store) = store() else { return };
let key = CacheKey::hash_data(b"zarr_test_cross_chunk");
let data: Vec<f64> = (0..16).map(|i| i as f64).collect();
let value = Value::tensor(data, vec![8, 2]);
let data_ref = store.put(&key, &value).unwrap();
let cache_dir = std::env::temp_dir().join("soma-zarr-integration");
let _ = std::fs::remove_dir_all(&cache_dir);
std::fs::create_dir_all(&cache_dir).ok();
let sliced = store.get_rows(&data_ref, 3, 3).unwrap();
let (vals, shape) = sliced.as_tensor().unwrap();
assert_eq!(shape, &[3, 2]);
assert_eq!(vals, &[6.0, 7.0, 8.0, 9.0, 10.0, 11.0]);
println!("Cross-chunk partial read ok");
store.remove(&data_ref).unwrap();
}
#[test]
fn variable_batch_reads() {
let Some(store) = store() else { return };
let key = CacheKey::hash_data(b"zarr_test_variable");
let data: Vec<f64> = (0..12).map(|i| i as f64).collect();
let value = Value::tensor(data, vec![12]);
let data_ref = store.put(&key, &value).unwrap();
let cache_dir = std::env::temp_dir().join("soma-zarr-integration");
let _ = std::fs::remove_dir_all(&cache_dir);
std::fs::create_dir_all(&cache_dir).ok();
let batch1 = store.get_rows(&data_ref, 0, 3).unwrap();
let (v, s) = batch1.as_tensor().unwrap();
assert_eq!(s, &[3]);
assert_eq!(v, &[0.0, 1.0, 2.0]);
let batch2 = store.get_rows(&data_ref, 3, 1).unwrap();
let (v, _) = batch2.as_tensor().unwrap();
assert_eq!(v, &[3.0]);
let batch3 = store.get_rows(&data_ref, 4, 7).unwrap();
let (v, s) = batch3.as_tensor().unwrap();
assert_eq!(s, &[7]);
assert_eq!(v, &[4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]);
let batch4 = store.get_rows(&data_ref, 11, 1).unwrap();
let (v, _) = batch4.as_tensor().unwrap();
assert_eq!(v, &[11.0]);
println!("Variable batch reads ok: 3 + 1 + 7 + 1 rows");
store.remove(&data_ref).unwrap();
}
#[test]
fn json_fallback() {
let Some(store) = store() else { return };
let key = CacheKey::hash_data(b"zarr_test_json");
let value = Value::Json(serde_json::json!({"model": "svm", "accuracy": 0.95}));
let data_ref = store.put(&key, &value).unwrap();
assert!(matches!(data_ref, DataRef::S3 { .. }));
let retrieved = store.get(&data_ref).unwrap();
assert_eq!(retrieved, value);
println!("JSON fallback ok");
store.remove(&data_ref).unwrap();
}
#[test]
fn local_cache_prevents_refetch() {
let Some(store) = store() else { return };
let key = CacheKey::hash_data(b"zarr_test_cache_hit");
let data: Vec<f64> = (0..8).map(|i| i as f64).collect();
let value = Value::tensor(data, vec![4, 2]);
let data_ref = store.put(&key, &value).unwrap();
let v1 = store.get_rows(&data_ref, 0, 2).unwrap();
let v2 = store.get_rows(&data_ref, 0, 2).unwrap();
assert_eq!(v1, v2);
println!("Local cache hit ok");
store.remove(&data_ref).unwrap();
}
#[test]
fn append_variable_batches() {
let Some(store) = store() else { return };
let key = CacheKey::hash_data(b"zarr_test_append");
let initial = Value::tensor(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], vec![3, 2]);
let data_ref = store.put(&key, &initial).unwrap();
store
.append(&data_ref, &Value::tensor(vec![7.0, 8.0], vec![1, 2]))
.unwrap();
let meta = store.meta(&data_ref).unwrap();
assert_eq!(meta.total_rows, 4);
store
.append(
&data_ref,
&Value::tensor(vec![9.0, 10.0, 11.0, 12.0, 13.0, 14.0], vec![3, 2]),
)
.unwrap();
let meta = store.meta(&data_ref).unwrap();
assert_eq!(meta.total_rows, 7);
let cache_dir = std::env::temp_dir().join("soma-zarr-integration");
let _ = std::fs::remove_dir_all(&cache_dir);
std::fs::create_dir_all(&cache_dir).ok();
let full = store.get(&data_ref).unwrap();
let (vals, shape) = full.as_tensor().unwrap();
assert_eq!(shape, &[7, 2]);
assert_eq!(
vals,
&[
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0
]
);
let mid = store.get_rows(&data_ref, 2, 4).unwrap();
let (vals, shape) = mid.as_tensor().unwrap();
assert_eq!(shape, &[4, 2]);
assert_eq!(vals, &[5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]);
println!("Append variable batches ok: 3 + 1 + 3 = 7 rows");
store.remove(&data_ref).unwrap();
}
#[test]
fn append_creates_multiple_new_chunks() {
let Some(store) = store() else { return };
let key = CacheKey::hash_data(b"zarr_test_append_big");
let initial = Value::tensor(vec![1.0, 2.0], vec![2]);
let data_ref = store.put(&key, &initial).unwrap();
let big: Vec<f64> = (3..13).map(|i| i as f64).collect();
store
.append(&data_ref, &Value::tensor(big, vec![10]))
.unwrap();
let meta = store.meta(&data_ref).unwrap();
assert_eq!(meta.total_rows, 12);
let cache_dir = std::env::temp_dir().join("soma-zarr-integration");
let _ = std::fs::remove_dir_all(&cache_dir);
std::fs::create_dir_all(&cache_dir).ok();
let full = store.get(&data_ref).unwrap();
let (vals, _) = full.as_tensor().unwrap();
let expected: Vec<f64> = (1..13).map(|i| i as f64).collect();
assert_eq!(vals, &expected);
println!("Append big batch ok: 2 + 10 = 12 rows across 3 chunks");
store.remove(&data_ref).unwrap();
}