use tensogram::{DecodeOptions, Dtype, decode};
use tensogram_netcdf::{ConvertOptions, DataPipeline, NetcdfError, SplitBy, convert_netcdf_file};
fn testdata(name: &str) -> std::path::PathBuf {
let mut path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push("testdata");
path.push(name);
path
}
type DecodedMsg = (
tensogram::types::GlobalMetadata,
Vec<tensogram::types::DecodedObject>,
);
fn decode_first(msgs: &[Vec<u8>]) -> DecodedMsg {
decode(&msgs[0], &DecodeOptions::default()).expect("decode failed")
}
#[test]
fn simple_2d_produces_one_message() {
let path = testdata("simple_2d.nc");
let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
assert_eq!(
msgs.len(),
1,
"SplitBy::File should produce exactly 1 message"
);
}
#[test]
fn simple_2d_has_one_object_with_correct_shape() {
let path = testdata("simple_2d.nc");
let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
let (meta, objects) = decode_first(&msgs);
assert_eq!(objects.len(), 1, "simple_2d.nc has 1 variable 'data'");
assert_eq!(meta.base.len(), 1);
assert_eq!(objects[0].0.shape, vec![5, 4]);
assert_eq!(objects[0].0.dtype, Dtype::Float64);
}
#[test]
fn simple_2d_data_roundtrip() {
let path = testdata("simple_2d.nc");
let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
let (_, objects) = decode_first(&msgs);
assert_eq!(objects[0].1.len(), 20 * 8);
}
#[test]
fn multi_dtype_preserves_native_dtypes() {
let path = testdata("multi_dtype.nc");
let opts = ConvertOptions {
encode_options: tensogram::EncodeOptions {
allow_nan: true,
..Default::default()
},
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
let (meta, objects) = decode_first(&msgs);
let mut dtype_map = std::collections::HashMap::new();
for (i, obj) in objects.iter().enumerate() {
if let Some(ciborium::Value::Text(name)) = meta.base[i].get("name") {
dtype_map.insert(name.clone(), obj.0.dtype);
}
}
assert_eq!(dtype_map.get("i8"), Some(&Dtype::Int8));
assert_eq!(dtype_map.get("i16"), Some(&Dtype::Int16));
assert_eq!(dtype_map.get("i32"), Some(&Dtype::Int32));
assert_eq!(dtype_map.get("i64"), Some(&Dtype::Int64));
assert_eq!(dtype_map.get("u8"), Some(&Dtype::Uint8));
assert_eq!(dtype_map.get("u16"), Some(&Dtype::Uint16));
assert_eq!(dtype_map.get("u32"), Some(&Dtype::Uint32));
assert_eq!(dtype_map.get("u64"), Some(&Dtype::Uint64));
assert_eq!(dtype_map.get("f32"), Some(&Dtype::Float32));
assert_eq!(dtype_map.get("f64"), Some(&Dtype::Float64));
}
#[test]
fn cf_temperature_has_netcdf_metadata() {
let path = testdata("cf_temperature.nc");
let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
let (meta, _) = decode_first(&msgs);
let temp_entry = meta.base.iter().find(|e| {
e.get("name").and_then(|v| {
if let ciborium::Value::Text(s) = v {
Some(s.as_str())
} else {
None
}
}) == Some("temperature")
});
assert!(
temp_entry.is_some(),
"temperature variable should be in base"
);
let entry = temp_entry.unwrap();
let netcdf_meta = entry.get("netcdf").expect("should have netcdf metadata");
assert!(
matches!(netcdf_meta, ciborium::Value::Map(_)),
"netcdf metadata should be a map"
);
}
#[test]
fn cf_temperature_no_cf_key_without_flag() {
let path = testdata("cf_temperature.nc");
let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
let (meta, _) = decode_first(&msgs);
for entry in &meta.base {
assert!(!entry.contains_key("cf"), "no 'cf' key without --cf flag");
}
}
#[test]
fn cf_temperature_has_cf_key_with_flag() {
let path = testdata("cf_temperature.nc");
let opts = ConvertOptions {
cf: true,
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
let (meta, _) = decode_first(&msgs);
let temp_entry = meta.base.iter().find(|e| {
e.get("name").and_then(|v| {
if let ciborium::Value::Text(s) = v {
Some(s.as_str())
} else {
None
}
}) == Some("temperature")
});
assert!(temp_entry.is_some());
let entry = temp_entry.unwrap();
assert!(
entry.contains_key("cf"),
"temperature should have 'cf' key with --cf flag"
);
}
#[test]
fn cf_temperature_unpacks_to_f64() {
let path = testdata("cf_temperature.nc");
let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
let (_, objects) = decode_first(&msgs);
let has_f64 = objects.iter().any(|o| o.0.dtype == Dtype::Float64);
assert!(has_f64, "packed variable should be unpacked to f64");
}
#[test]
fn multi_dtype_nan_values_in_f64_with_nan() {
let path = testdata("multi_dtype.nc");
let opts = ConvertOptions {
encode_options: tensogram::EncodeOptions {
allow_nan: true,
..Default::default()
},
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
let (meta, objects) = decode_first(&msgs);
let nan_idx = meta.base.iter().position(|e| {
e.get("name").and_then(|v| {
if let ciborium::Value::Text(s) = v {
Some(s.as_str())
} else {
None
}
}) == Some("f64_with_nan")
});
if let Some(idx) = nan_idx {
let data = &objects[idx].1;
let floats: Vec<f64> = data
.chunks_exact(8)
.map(|b| f64::from_le_bytes(b.try_into().unwrap()))
.collect();
assert!(
floats.iter().any(|v: &f64| v.is_nan()),
"f64_with_nan should contain NaN values"
);
}
}
#[test]
fn split_by_file_produces_one_message() {
let path = testdata("multi_var.nc");
let opts = ConvertOptions {
split_by: SplitBy::File,
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
assert_eq!(msgs.len(), 1);
}
#[test]
fn split_by_variable_produces_one_message_per_variable() {
let path = testdata("multi_var.nc");
let opts = ConvertOptions {
split_by: SplitBy::Variable,
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
assert!(
msgs.len() >= 3,
"should have at least 3 messages for 3 numeric vars"
);
for msg in &msgs {
let (_, objects) = decode(msg, &DecodeOptions::default()).unwrap();
assert_eq!(
objects.len(),
1,
"each message should have 1 object in variable mode"
);
}
}
#[test]
fn split_by_record_produces_one_message_per_record() {
let path = testdata("unlimited_time.nc");
let opts = ConvertOptions {
split_by: SplitBy::Record,
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
assert_eq!(
msgs.len(),
5,
"should produce 5 messages for 5 time records"
);
}
#[test]
fn split_by_record_errors_without_unlimited_dim() {
let path = testdata("simple_2d.nc");
let opts = ConvertOptions {
split_by: SplitBy::Record,
..Default::default()
};
let result = convert_netcdf_file(&path, &opts);
assert!(
matches!(result, Err(NetcdfError::NoUnlimitedDimension { .. })),
"should error when no unlimited dimension"
);
}
#[test]
fn cf_flag_extracts_standard_name() {
let path = testdata("cf_temperature.nc");
let opts = ConvertOptions {
cf: true,
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
let (meta, _) = decode_first(&msgs);
let temp_entry = meta.base.iter().find(|e| {
e.get("name").and_then(|v| {
if let ciborium::Value::Text(s) = v {
Some(s.as_str())
} else {
None
}
}) == Some("temperature")
});
assert!(temp_entry.is_some());
let cf_val = temp_entry.unwrap().get("cf").expect("should have cf key");
if let ciborium::Value::Map(cf_map) = cf_val {
let has_standard_name = cf_map
.iter()
.any(|(k, _)| matches!(k, ciborium::Value::Text(s) if s == "standard_name"));
assert!(has_standard_name, "cf map should contain standard_name");
} else {
panic!("cf value should be a map");
}
}
#[test]
fn empty_file_returns_no_variables_error() {
let path = testdata("empty_file.nc");
let result = convert_netcdf_file(&path, &ConvertOptions::default());
assert!(
matches!(result, Err(NetcdfError::NoVariables)),
"empty file should return NoVariables error, got: {:?}",
result
);
}
#[test]
fn nc3_classic_converts_successfully() {
let path = testdata("nc3_classic.nc");
let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
assert_eq!(
msgs.len(),
1,
"NC3 classic file should convert to 1 message"
);
let (_, objects) = decode_first(&msgs);
assert!(!objects.is_empty(), "should have at least 1 object");
}
#[test]
fn nc4_groups_warns_and_converts_root_only() {
let path = testdata("nc4_groups.nc");
let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
assert_eq!(msgs.len(), 1);
let (meta, _) = decode_first(&msgs);
let has_root_var = meta.base.iter().any(|e| {
e.get("name").and_then(|v| {
if let ciborium::Value::Text(s) = v {
Some(s.as_str())
} else {
None
}
}) == Some("root_var")
});
assert!(has_root_var, "root_var should be present");
let has_predicted = meta.base.iter().any(|e| {
e.get("name").and_then(|v| {
if let ciborium::Value::Text(s) = v {
Some(s.as_str())
} else {
None
}
}) == Some("predicted")
});
assert!(
!has_predicted,
"predicted (sub-group var) should NOT be present"
);
}
#[test]
fn multi_var_skips_char_variables() {
let path = testdata("multi_var.nc");
let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
let (meta, _) = decode_first(&msgs);
let has_description = meta.base.iter().any(|e| {
e.get("name").and_then(|v| {
if let ciborium::Value::Text(s) = v {
Some(s.as_str())
} else {
None
}
}) == Some("description")
});
assert!(
!has_description,
"char variable 'description' should be skipped"
);
}
#[test]
fn multi_var_has_all_numeric_variables() {
let path = testdata("multi_var.nc");
let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
let (meta, _) = decode_first(&msgs);
let names: Vec<&str> = meta
.base
.iter()
.filter_map(|e| {
e.get("name").and_then(|v| {
if let ciborium::Value::Text(s) = v {
Some(s.as_str())
} else {
None
}
})
})
.collect();
assert!(
names.contains(&"temperature"),
"temperature should be present"
);
assert!(names.contains(&"humidity"), "humidity should be present");
assert!(names.contains(&"pressure"), "pressure should be present");
}
#[test]
fn scalar_variable_has_empty_shape() {
let path = testdata("multi_dtype.nc");
let opts = ConvertOptions {
encode_options: tensogram::EncodeOptions {
allow_nan: true,
..Default::default()
},
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
let (meta, objects) = decode_first(&msgs);
let pi_idx = meta.base.iter().position(|e| {
e.get("name").and_then(|v| {
if let ciborium::Value::Text(s) = v {
Some(s.as_str())
} else {
None
}
}) == Some("pi")
});
if let Some(idx) = pi_idx {
assert_eq!(
objects[idx].0.shape,
vec![],
"scalar should have empty shape"
);
assert_eq!(objects[idx].0.ndim, 0, "scalar should have ndim=0");
}
}
#[test]
fn unlimited_time_static_var_included_in_record_split() {
let path = testdata("unlimited_time.nc");
let opts = ConvertOptions {
split_by: SplitBy::Record,
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
assert!(!msgs.is_empty());
let (meta, _) = decode(&msgs[0], &DecodeOptions::default()).unwrap();
let has_mask = meta.base.iter().any(|e| {
e.get("name").and_then(|v| {
if let ciborium::Value::Text(s) = v {
Some(s.as_str())
} else {
None
}
}) == Some("mask")
});
assert!(
has_mask,
"static variable 'mask' should appear in each record message"
);
}
#[test]
fn default_pipeline_stays_none() {
let path = testdata("simple_2d.nc");
let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
let (_, objects) = decode_first(&msgs);
assert_eq!(objects[0].0.encoding, "none");
assert_eq!(objects[0].0.filter, "none");
assert_eq!(objects[0].0.compression, "none");
assert!(
objects[0].0.params.is_empty(),
"default pipeline must not insert any params"
);
}
#[test]
fn simple_2d_with_simple_packing_24bit() {
let path = testdata("simple_2d.nc");
let opts = ConvertOptions {
pipeline: DataPipeline {
encoding: "simple_packing".to_string(),
bits: Some(24),
..Default::default()
},
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
let (_, objects) = decode_first(&msgs);
assert_eq!(objects[0].0.encoding, "simple_packing");
let bpv = objects[0]
.0
.params
.get("bits_per_value")
.expect("bits_per_value param");
if let ciborium::Value::Integer(i) = bpv {
let n: i128 = (*i).into();
assert_eq!(n, 24, "bits_per_value should be 24");
} else {
panic!("bits_per_value should be an integer");
}
assert!(objects[0].0.params.contains_key("reference_value"));
assert!(objects[0].0.params.contains_key("binary_scale_factor"));
assert!(objects[0].0.params.contains_key("decimal_scale_factor"));
}
#[test]
fn simple_2d_with_shuffle_plus_zstd() {
let path = testdata("simple_2d.nc");
let opts = ConvertOptions {
pipeline: DataPipeline {
filter: "shuffle".to_string(),
compression: "zstd".to_string(),
..Default::default()
},
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
let (_, objects) = decode_first(&msgs);
assert_eq!(objects[0].0.filter, "shuffle");
assert_eq!(objects[0].0.compression, "zstd");
let element_size = objects[0]
.0
.params
.get("shuffle_element_size")
.expect("shuffle_element_size param");
if let ciborium::Value::Integer(i) = element_size {
let n: i128 = (*i).into();
assert_eq!(n, 8);
} else {
panic!("shuffle_element_size should be an integer");
}
let level = objects[0]
.0
.params
.get("zstd_level")
.expect("zstd_level param");
if let ciborium::Value::Integer(i) = level {
let n: i128 = (*i).into();
assert_eq!(n, 3);
} else {
panic!("zstd_level should be an integer");
}
}
#[test]
fn simple_2d_with_zstd_custom_level() {
let path = testdata("simple_2d.nc");
let opts = ConvertOptions {
pipeline: DataPipeline {
compression: "zstd".to_string(),
compression_level: Some(7),
..Default::default()
},
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
let (_, objects) = decode_first(&msgs);
let level = objects[0]
.0
.params
.get("zstd_level")
.expect("zstd_level param");
if let ciborium::Value::Integer(i) = level {
let n: i128 = (*i).into();
assert_eq!(n, 7);
} else {
panic!("zstd_level should be an integer");
}
}
#[test]
fn simple_2d_with_lz4_compression() {
let path = testdata("simple_2d.nc");
let opts = ConvertOptions {
pipeline: DataPipeline {
compression: "lz4".to_string(),
..Default::default()
},
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
let (_, objects) = decode_first(&msgs);
assert_eq!(objects[0].0.compression, "lz4");
}
#[test]
fn simple_2d_with_szip_compression() {
let path = testdata("simple_2d.nc");
let opts = ConvertOptions {
pipeline: DataPipeline {
encoding: "simple_packing".to_string(),
bits: Some(16),
compression: "szip".to_string(),
..Default::default()
},
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
let (_, objects) = decode_first(&msgs);
assert_eq!(objects[0].0.compression, "szip");
assert!(objects[0].0.params.contains_key("szip_rsi"));
assert!(objects[0].0.params.contains_key("szip_block_size"));
assert!(objects[0].0.params.contains_key("szip_flags"));
}
#[test]
fn simple_2d_with_shuffle_szip_combo() {
let path = testdata("simple_2d.nc");
let opts = ConvertOptions {
pipeline: DataPipeline {
filter: "shuffle".to_string(),
compression: "szip".to_string(),
..Default::default()
},
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
let (_, objects) = decode_first(&msgs);
assert_eq!(objects[0].0.filter, "shuffle");
assert_eq!(objects[0].0.compression, "szip");
}
#[test]
fn simple_2d_with_blosc2_compression() {
let path = testdata("simple_2d.nc");
let opts = ConvertOptions {
pipeline: DataPipeline {
compression: "blosc2".to_string(),
compression_level: Some(9),
..Default::default()
},
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
let (_, objects) = decode_first(&msgs);
assert_eq!(objects[0].0.compression, "blosc2");
let clevel = objects[0]
.0
.params
.get("blosc2_clevel")
.expect("blosc2_clevel param");
if let ciborium::Value::Integer(i) = clevel {
let n: i128 = (*i).into();
assert_eq!(n, 9);
} else {
panic!("blosc2_clevel should be an integer");
}
}
#[test]
fn unknown_compression_errors() {
let path = testdata("simple_2d.nc");
let opts = ConvertOptions {
pipeline: DataPipeline {
compression: "bogus".to_string(),
..Default::default()
},
..Default::default()
};
let result = convert_netcdf_file(&path, &opts);
assert!(result.is_err(), "unknown compression should error");
let err = result.unwrap_err();
let msg = format!("{err}");
assert!(
msg.contains("bogus"),
"error should mention 'bogus', got: {msg}"
);
}
#[test]
fn unknown_encoding_errors() {
let path = testdata("simple_2d.nc");
let opts = ConvertOptions {
pipeline: DataPipeline {
encoding: "magic_packing".to_string(),
..Default::default()
},
..Default::default()
};
let result = convert_netcdf_file(&path, &opts);
assert!(result.is_err(), "unknown encoding should error");
let msg = format!("{}", result.unwrap_err());
assert!(msg.contains("magic_packing"));
}
#[test]
fn unknown_filter_errors() {
let path = testdata("simple_2d.nc");
let opts = ConvertOptions {
pipeline: DataPipeline {
filter: "wibble".to_string(),
..Default::default()
},
..Default::default()
};
let result = convert_netcdf_file(&path, &opts);
assert!(result.is_err(), "unknown filter should error");
let msg = format!("{}", result.unwrap_err());
assert!(msg.contains("wibble"));
}
#[test]
fn simple_packing_on_multi_dtype_fails_on_nan_variable() {
let path = testdata("multi_dtype.nc");
let opts = ConvertOptions {
pipeline: DataPipeline {
encoding: "simple_packing".to_string(),
bits: Some(16),
..Default::default()
},
..Default::default()
};
let err = convert_netcdf_file(&path, &opts).unwrap_err();
let msg = format!("{err}");
assert!(
msg.contains("simple_packing") && msg.contains("f64_with_nan"),
"error must name encoding + offending variable: {msg}"
);
assert!(
msg.contains("NaN"),
"error must name the trigger kind: {msg}"
);
}
#[test]
fn pipeline_round_trip_zstd_decodes_back_to_input() {
let path = testdata("simple_2d.nc");
let baseline = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
let (_, baseline_objects) = decode_first(&baseline);
let baseline_bytes = baseline_objects[0].1.clone();
let zstd_opts = ConvertOptions {
pipeline: DataPipeline {
compression: "zstd".to_string(),
..Default::default()
},
..Default::default()
};
let zstd_msgs = convert_netcdf_file(&path, &zstd_opts).unwrap();
let (_, zstd_objects) = decode_first(&zstd_msgs);
assert_eq!(
zstd_objects[0].1, baseline_bytes,
"zstd round-trip should recover the original payload bytes exactly"
);
}
#[test]
fn simple_packing_plus_shuffle_uses_post_encoding_element_size() {
let path = testdata("simple_2d.nc");
let opts = ConvertOptions {
pipeline: DataPipeline {
encoding: "simple_packing".to_string(),
bits: Some(16),
filter: "shuffle".to_string(),
..Default::default()
},
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
let (_, objects) = decode_first(&msgs);
assert_eq!(objects[0].0.encoding, "simple_packing");
assert_eq!(objects[0].0.filter, "shuffle");
let element_size = objects[0]
.0
.params
.get("shuffle_element_size")
.expect("shuffle_element_size");
if let ciborium::Value::Integer(i) = element_size {
let n: i128 = (*i).into();
assert_eq!(n, 2, "element_size should be ⌈16/8⌉ = 2 post-pack");
} else {
panic!("shuffle_element_size should be integer");
}
}
#[test]
fn record_multi_dtype_covers_all_read_native_extents_arms() {
let path = testdata("record_multi_dtype.nc");
let opts = ConvertOptions {
split_by: SplitBy::Record,
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
assert_eq!(msgs.len(), 3, "record_multi_dtype has 3 timesteps");
let (meta, objects) = decode_first(&msgs);
let names: Vec<String> = meta
.base
.iter()
.filter_map(|e| {
e.get("name").and_then(|v| {
if let ciborium::Value::Text(s) = v {
Some(s.clone())
} else {
None
}
})
})
.collect();
let mut by_name = std::collections::HashMap::new();
for (i, obj) in objects.iter().enumerate() {
if let Some(name) = names.get(i) {
by_name.insert(name.clone(), obj.0.dtype);
}
}
assert_eq!(by_name.get("v_i8"), Some(&Dtype::Int8));
assert_eq!(by_name.get("v_u8"), Some(&Dtype::Uint8));
assert_eq!(by_name.get("v_i16"), Some(&Dtype::Int16));
assert_eq!(by_name.get("v_u16"), Some(&Dtype::Uint16));
assert_eq!(by_name.get("v_i32"), Some(&Dtype::Int32));
assert_eq!(by_name.get("v_u32"), Some(&Dtype::Uint32));
assert_eq!(by_name.get("v_i64"), Some(&Dtype::Int64));
assert_eq!(by_name.get("v_u64"), Some(&Dtype::Uint64));
assert_eq!(by_name.get("v_f32"), Some(&Dtype::Float32));
assert_eq!(by_name.get("v_f64"), Some(&Dtype::Float64));
}
#[test]
fn attr_type_variants_all_unpack_to_f64() {
let path = testdata("attr_type_variants.nc");
let opts = ConvertOptions {
encode_options: tensogram::EncodeOptions {
allow_nan: true,
..Default::default()
},
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
let (meta, objects) = decode_first(&msgs);
let mut by_name = std::collections::HashMap::new();
for (i, obj) in objects.iter().enumerate() {
if let Some(ciborium::Value::Text(name)) = meta.base[i].get("name") {
by_name.insert(name.clone(), obj.0.dtype);
}
}
assert_eq!(by_name.get("scaled_float"), Some(&Dtype::Float64));
assert_eq!(by_name.get("scaled_int"), Some(&Dtype::Float64));
assert_eq!(by_name.get("scaled_short"), Some(&Dtype::Float64));
assert_eq!(by_name.get("scaled_longlong"), Some(&Dtype::Float64));
assert_eq!(by_name.get("with_missing"), Some(&Dtype::Float64));
}
#[test]
fn attr_type_variants_string_scale_factor_returns_raw_data() {
let path = testdata("attr_type_variants.nc");
let opts = ConvertOptions {
encode_options: tensogram::EncodeOptions {
allow_nan: true,
..Default::default()
},
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
let (meta, objects) = decode_first(&msgs);
let idx = meta
.base
.iter()
.position(|e| e.get("name") == Some(&ciborium::Value::Text("string_scale".to_string())))
.expect("string_scale variable");
let data = &objects[idx].1;
let floats: Vec<f64> = data
.chunks_exact(8)
.map(|b| f64::from_le_bytes(b.try_into().unwrap()))
.collect();
assert_eq!(floats, vec![1.0, 2.0, 3.0, 4.0]);
}
#[test]
fn empty_unlimited_record_split_returns_no_messages() {
let path = testdata("empty_unlimited.nc");
let opts = ConvertOptions {
split_by: SplitBy::Record,
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
assert_eq!(
msgs.len(),
0,
"zero-record file should produce zero messages"
);
}
#[test]
fn complex_types_unlimited_skipped_in_record_split() {
let path = testdata("complex_types_unlimited.nc");
let opts = ConvertOptions {
split_by: SplitBy::Record,
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
assert_eq!(msgs.len(), 2);
for msg in &msgs {
let (meta, _) = decode(msg, &DecodeOptions::default()).unwrap();
let names: Vec<String> = meta
.base
.iter()
.filter_map(|e| {
e.get("name").and_then(|v| {
if let ciborium::Value::Text(s) = v {
Some(s.clone())
} else {
None
}
})
})
.collect();
assert!(names.contains(&"value".to_string()));
assert!(
!names.contains(&"state".to_string()),
"enum variable 'state' should be skipped in record split"
);
}
}
#[test]
fn complex_types_are_skipped_with_warning() {
let path = testdata("complex_types.nc");
let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
let (meta, objects) = decode_first(&msgs);
let names: Vec<String> = meta
.base
.iter()
.filter_map(|e| {
e.get("name").and_then(|v| {
if let ciborium::Value::Text(s) = v {
Some(s.clone())
} else {
None
}
})
})
.collect();
assert!(names.contains(&"value".to_string()));
assert!(
!names.contains(&"status".to_string()),
"enum variable 'status' should be skipped"
);
assert_eq!(objects.len(), 1);
}
#[test]
fn record_with_char_skips_char_variable() {
let path = testdata("record_with_char.nc");
let opts = ConvertOptions {
split_by: SplitBy::Record,
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
assert_eq!(msgs.len(), 2);
for msg in &msgs {
let (meta, _) = decode(msg, &DecodeOptions::default()).unwrap();
let names: Vec<String> = meta
.base
.iter()
.filter_map(|e| {
e.get("name").and_then(|v| {
if let ciborium::Value::Text(s) = v {
Some(s.clone())
} else {
None
}
})
})
.collect();
assert!(names.contains(&"values".to_string()));
assert!(
!names.contains(&"labels".to_string()),
"char variable 'labels' should be skipped in record split"
);
}
}
#[test]
fn attr_type_variants_missing_value_replaced_with_nan() {
let path = testdata("attr_type_variants.nc");
let opts = ConvertOptions {
encode_options: tensogram::EncodeOptions {
allow_nan: true,
..Default::default()
},
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
let (meta, objects) = decode_first(&msgs);
let idx = meta
.base
.iter()
.position(|e| e.get("name") == Some(&ciborium::Value::Text("with_missing".to_string())))
.expect("with_missing variable");
let data = &objects[idx].1;
let floats: Vec<f64> = data
.chunks_exact(8)
.map(|b| f64::from_le_bytes(b.try_into().unwrap()))
.collect();
let nan_count = floats.iter().filter(|v| v.is_nan()).count();
assert_eq!(
nan_count, 2,
"with_missing has 2 sentinel values → 2 NaNs, got {nan_count} in {floats:?}"
);
assert!(floats.contains(&5.0));
assert!(floats.contains(&10.0));
}
#[test]
fn record_multi_dtype_records_are_independent() {
let path = testdata("record_multi_dtype.nc");
let opts = ConvertOptions {
split_by: SplitBy::Record,
..Default::default()
};
let msgs = convert_netcdf_file(&path, &opts).unwrap();
assert_eq!(msgs.len(), 3);
let mut last_names: Option<Vec<String>> = None;
for msg in &msgs {
let (meta, _) = decode(msg, &DecodeOptions::default()).unwrap();
let mut names: Vec<String> = meta
.base
.iter()
.filter_map(|e| {
e.get("name").and_then(|v| {
if let ciborium::Value::Text(s) = v {
Some(s.clone())
} else {
None
}
})
})
.collect();
names.sort();
match &last_names {
Some(prev) => assert_eq!(&names, prev, "each record should have same var set"),
None => last_names = Some(names),
}
}
}