use ndarray::{Array, Dim, IxDyn};
use netcdf::{self, Attribute, Variable as NetCDFVariable};
use std::collections::HashMap;
use std::path::Path;
use tracing::{debug, info, warn};
use crate::config::Config;
use crate::error::{Result, RossbyError};
use crate::state::{AppState, AttributeValue, Dimension, Metadata, Variable};
pub type LoadResult = Result<(Metadata, HashMap<String, Array<f32, IxDyn>>)>;
pub fn load_netcdf(path: &Path, config: Config) -> Result<AppState> {
let (metadata, data) = load_netcdf_file(path)?;
validate_netcdf_data(&metadata, &data)?;
let app_state = AppState::new(config, metadata, data);
Ok(app_state)
}
fn load_netcdf_file(path: &Path) -> LoadResult {
if !path.exists() {
return Err(RossbyError::Io(std::io::Error::new(
std::io::ErrorKind::NotFound,
format!("File not found: {}", path.display()),
)));
}
let file = match netcdf::open(path) {
Ok(f) => f,
Err(e) => {
return Err(RossbyError::NetCdf {
message: format!("Failed to open NetCDF file: {}", e),
});
}
};
info!("Opened NetCDF file: {}", path.display());
let variables_count = file.variables().count();
let dimensions_count = file.dimensions().count();
debug!("File has {} variables", variables_count);
debug!("File has {} dimensions", dimensions_count);
let metadata = extract_metadata(&file)?;
let data = extract_data(&file, &metadata)?;
Ok((metadata, data))
}
fn extract_metadata(file: &netcdf::File) -> Result<Metadata> {
let mut global_attributes = HashMap::new();
for attr in file.attributes() {
let value = convert_attribute(&attr)?;
global_attributes.insert(attr.name().to_string(), value);
}
let mut dimensions = HashMap::new();
for dim in file.dimensions() {
let dimension = Dimension {
name: dim.name().to_string(),
size: dim.len(),
is_unlimited: dim.is_unlimited(),
};
dimensions.insert(dim.name().to_string(), dimension);
}
let mut variables = HashMap::new();
let mut coordinates = HashMap::new();
for var in file.variables() {
if !is_supported_variable(&var) {
warn!("Skipping unsupported variable: {}", var.name());
continue;
}
let var_dims: Vec<String> = var
.dimensions()
.iter()
.map(|dim| dim.name().to_string())
.collect();
let var_shape: Vec<usize> = var_dims
.iter()
.map(|name| file.dimension(name).unwrap().len())
.collect();
let mut var_attrs = HashMap::new();
for attr in var.attributes() {
let value = convert_attribute(&attr)?;
var_attrs.insert(attr.name().to_string(), value);
}
let variable = Variable {
name: var.name().to_string(),
dimensions: var_dims,
shape: var_shape,
attributes: var_attrs,
dtype: format!("{:?}", var.vartype()),
};
variables.insert(var.name().to_string(), variable);
if file.dimension(&var.name()).is_some() {
let coord_values = extract_coordinate_values(&var)?;
coordinates.insert(var.name().to_string(), coord_values);
}
}
for dim_name in dimensions.keys() {
if !coordinates.contains_key(dim_name) {
let dim_size = dimensions[dim_name].size;
let coord_values: Vec<f64> = (0..dim_size).map(|i| i as f64).collect();
coordinates.insert(dim_name.to_string(), coord_values);
warn!("Created default coordinates for dimension: {}", dim_name);
}
}
Ok(Metadata {
global_attributes,
dimensions,
variables,
coordinates,
})
}
fn is_supported_variable(var: &NetCDFVariable) -> bool {
use netcdf::types::{FloatType, IntType, NcVariableType};
matches!(
var.vartype(),
NcVariableType::Int(IntType::I8)
| NcVariableType::Char
| NcVariableType::Int(IntType::I16)
| NcVariableType::Int(IntType::I32)
| NcVariableType::Int(IntType::I64)
| NcVariableType::Float(FloatType::F32)
| NcVariableType::Float(FloatType::F64)
)
}
fn convert_attribute(attr: &Attribute) -> Result<AttributeValue> {
use netcdf::AttributeValue as NcAttributeValue;
let value = attr.value()?;
match value {
NcAttributeValue::Str(s) => Ok(AttributeValue::Text(s)),
NcAttributeValue::Uchar(v) => Ok(AttributeValue::Number(v as f64)),
NcAttributeValue::Schar(v) => Ok(AttributeValue::Number(v as f64)),
NcAttributeValue::Short(v) => Ok(AttributeValue::Number(v as f64)),
NcAttributeValue::Int(v) => Ok(AttributeValue::Number(v as f64)),
NcAttributeValue::Float(v) => Ok(AttributeValue::Number(v as f64)),
NcAttributeValue::Double(v) => Ok(AttributeValue::Number(v)),
_ => {
Ok(AttributeValue::Text(format!("{:?}", value)))
}
}
}
fn extract_coordinate_values(var: &NetCDFVariable) -> Result<Vec<f64>> {
use netcdf::types::{FloatType, IntType, NcVariableType};
let dim_size = var.dimensions()[0].len();
let mut values = Vec::with_capacity(dim_size);
match var.vartype() {
NcVariableType::Int(IntType::I8) => {
for i in 0..dim_size {
let index = [i]; let value: i8 = var.get_value(index)?;
values.push(value as f64);
}
}
NcVariableType::Int(IntType::I16) => {
for i in 0..dim_size {
let index = [i];
let value: i16 = var.get_value(index)?;
values.push(value as f64);
}
}
NcVariableType::Int(IntType::I32) => {
for i in 0..dim_size {
let index = [i];
let value: i32 = var.get_value(index)?;
values.push(value as f64);
}
}
NcVariableType::Int(IntType::I64) => {
for i in 0..dim_size {
let index = [i];
let value: i64 = var.get_value(index)?;
values.push(value as f64);
}
}
NcVariableType::Float(FloatType::F32) => {
for i in 0..dim_size {
let index = [i];
let value: f32 = var.get_value(index)?;
values.push(value as f64);
}
}
NcVariableType::Float(FloatType::F64) => {
for i in 0..dim_size {
let index = [i];
let value: f64 = var.get_value(index)?;
values.push(value);
}
}
_ => {
for i in 0..dim_size {
values.push(i as f64);
}
warn!(
"Unsupported coordinate variable type: {:?}, using indices instead",
var.vartype()
);
}
}
Ok(values)
}
fn extract_data(
file: &netcdf::File,
metadata: &Metadata,
) -> Result<HashMap<String, Array<f32, IxDyn>>> {
let mut data = HashMap::new();
for var_name in metadata.variables.keys() {
if let Some(var) = file.variable(var_name) {
if !is_supported_variable(&var) {
continue;
}
let shape = &metadata.variables[var_name].shape;
let array = convert_variable_to_array(&var, shape)?;
data.insert(var_name.clone(), array);
}
}
Ok(data)
}
fn convert_variable_to_array(var: &NetCDFVariable, shape: &[usize]) -> Result<Array<f32, IxDyn>> {
use netcdf::types::{FloatType, IntType, NcVariableType};
let dim = Dim(shape.to_vec());
let total_elements = shape.iter().product();
let mut data = Vec::with_capacity(total_elements);
let mut indices = vec![0; shape.len()];
match var.vartype() {
NcVariableType::Int(IntType::I8) => {
let mut index_array = [0; 10];
for i in 0..total_elements {
compute_indices(&mut indices, i, shape);
index_array[..shape.len()].copy_from_slice(&indices[..shape.len()]);
let value: i8 = var.get_value(&index_array[..shape.len()])?;
data.push(value as f32);
}
}
NcVariableType::Int(IntType::I16) => {
let mut index_array = [0; 10];
for i in 0..total_elements {
compute_indices(&mut indices, i, shape);
index_array[..shape.len()].copy_from_slice(&indices[..shape.len()]);
let value: i16 = var.get_value(&index_array[..shape.len()])?;
data.push(value as f32);
}
}
NcVariableType::Int(IntType::I32) => {
let mut index_array = [0; 10];
for i in 0..total_elements {
compute_indices(&mut indices, i, shape);
index_array[..shape.len()].copy_from_slice(&indices[..shape.len()]);
let value: i32 = var.get_value(&index_array[..shape.len()])?;
data.push(value as f32);
}
}
NcVariableType::Int(IntType::I64) => {
let mut index_array = [0; 10];
for i in 0..total_elements {
compute_indices(&mut indices, i, shape);
index_array[..shape.len()].copy_from_slice(&indices[..shape.len()]);
let value: i64 = var.get_value(&index_array[..shape.len()])?;
data.push(value as f32);
}
}
NcVariableType::Float(FloatType::F32) => {
let mut index_array = [0; 10];
for i in 0..total_elements {
compute_indices(&mut indices, i, shape);
index_array[..shape.len()].copy_from_slice(&indices[..shape.len()]);
let value: f32 = var.get_value(&index_array[..shape.len()])?;
data.push(value);
}
}
NcVariableType::Float(FloatType::F64) => {
let mut index_array = [0; 10];
for i in 0..total_elements {
compute_indices(&mut indices, i, shape);
index_array[..shape.len()].copy_from_slice(&indices[..shape.len()]);
let value: f64 = var.get_value(&index_array[..shape.len()])?;
data.push(value as f32);
}
}
_ => {
return Err(RossbyError::NetCdf {
message: format!("Unsupported variable type: {:?}", var.vartype()),
})
}
}
let array = Array::from_shape_vec(dim, data)?;
Ok(array)
}
fn compute_indices(indices: &mut [usize], flat_index: usize, shape: &[usize]) {
let mut remaining = flat_index;
for (i, &dim_size) in shape.iter().enumerate().rev() {
indices[i] = remaining % dim_size;
remaining /= dim_size;
}
}
#[cfg(test)]
fn create_test_netcdf_file(path: &Path) -> Result<()> {
let mut file = netcdf::create(path)?;
file.add_attribute("title", "Rossby Test File")?;
file.add_attribute("source", "test")?;
let lon_size = 2;
let lat_size = 2;
let time_size = 2;
file.add_dimension("lon", lon_size)?;
file.add_dimension("lat", lat_size)?;
file.add_dimension("time", time_size)?;
{
let mut lon_var = file.add_variable::<f64>("lon", &["lon"])?;
lon_var.put_attribute("units", "degrees_east")?;
lon_var.put_value(0.0, [0])?;
lon_var.put_value(1.0, [1])?;
}
{
let mut lat_var = file.add_variable::<f64>("lat", &["lat"])?;
lat_var.put_attribute("units", "degrees_north")?;
lat_var.put_value(0.0, [0])?;
lat_var.put_value(1.0, [1])?;
}
{
let mut time_var = file.add_variable::<f64>("time", &["time"])?;
time_var.put_attribute("units", "days since 2000-01-01")?;
time_var.put_value(0.0, [0])?;
time_var.put_value(1.0, [1])?;
}
{
let mut temp_var = file.add_variable::<f32>("temperature", &["time", "lat", "lon"])?;
temp_var.put_attribute("units", "K")?;
temp_var.put_attribute("long_name", "Temperature")?;
for t in 0..time_size {
for y in 0..lat_size {
for x in 0..lon_size {
let value = (t * lat_size * lon_size + y * lon_size + x) as f32;
temp_var.put_value(value, [t, y, x])?;
}
}
}
}
file.sync()?;
let file_verify = netcdf::open(path)?;
println!("TEST FILE CREATED with dimensions:");
for dim in file_verify.dimensions() {
println!(" Dimension '{}' has size {}", dim.name(), dim.len());
}
println!("TEST FILE VARIABLES:");
for var in file_verify.variables() {
println!(
" Variable '{}' dimensions: {:?}",
var.name(),
var.dimensions()
);
if let Ok(values) = var.get_values::<f32, _>(&[] as &[netcdf::Extent]) {
println!(" Values (as f32): {:?}", values);
} else if let Ok(values) = var.get_values::<f64, _>(&[] as &[netcdf::Extent]) {
println!(" Values (as f64): {:?}", values);
}
}
Ok(())
}
fn validate_netcdf_data(
metadata: &Metadata,
data: &HashMap<String, Array<f32, IxDyn>>,
) -> Result<()> {
if metadata.variables.is_empty() {
return Err(RossbyError::DataNotFound {
message: "No variables found in NetCDF file".to_string(),
});
}
for (var_name, var) in &metadata.variables {
for dim_name in &var.dimensions {
if !metadata.dimensions.contains_key(dim_name) {
return Err(RossbyError::DataNotFound {
message: format!(
"Variable {} references non-existent dimension {}",
var_name, dim_name
),
});
}
}
if let Some(array) = data.get(var_name) {
let shape = array.shape();
if shape.len() != var.dimensions.len() {
return Err(RossbyError::DataNotFound {
message: format!(
"Variable {} has inconsistent dimensions: metadata has {}, data has {}",
var_name,
var.dimensions.len(),
shape.len()
),
});
}
for (i, dim_name) in var.dimensions.iter().enumerate() {
let expected_size = metadata.dimensions[dim_name].size;
if shape[i] != expected_size {
return Err(RossbyError::DataNotFound {
message: format!(
"Variable {} dimension {} has inconsistent size: expected {}, got {}",
var_name, dim_name, expected_size, shape[i]
),
});
}
}
} else {
return Err(RossbyError::DataNotFound {
message: format!("Data array for variable {} not found", var_name),
});
}
}
for dim_name in metadata.dimensions.keys() {
if !metadata.coordinates.contains_key(dim_name) {
return Err(RossbyError::DataNotFound {
message: format!("Coordinate values for dimension {} not found", dim_name),
});
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
#[test]
fn test_real_climate_data() -> Result<()> {
let file_path = Path::new("tests/fixtures/2m_temperature_1982_5.625deg.nc");
if !file_path.exists() {
println!("Skipping test_real_climate_data as fixture file is not present");
return Ok(());
}
println!("Loading real climate data from: {}", file_path.display());
let (metadata, data) = load_netcdf_file(file_path)?;
assert!(metadata.dimensions.contains_key("time"));
assert!(metadata.dimensions.contains_key("lat"));
assert!(metadata.dimensions.contains_key("lon"));
assert_eq!(metadata.dimensions["time"].size, 53);
assert_eq!(metadata.dimensions["lat"].size, 32);
assert_eq!(metadata.dimensions["lon"].size, 64);
assert!(metadata.variables.contains_key("t2m"));
assert!(metadata.variables.contains_key("lat"));
assert!(metadata.variables.contains_key("lon"));
assert!(metadata.variables.contains_key("time"));
assert!(metadata.coordinates.contains_key("lat"));
assert!(metadata.coordinates.contains_key("lon"));
assert!(metadata.coordinates.contains_key("time"));
assert_eq!(metadata.coordinates["lat"].len(), 32);
assert_eq!(metadata.coordinates["lon"].len(), 64);
assert_eq!(metadata.coordinates["time"].len(), 53);
assert_eq!(metadata.coordinates["lat"][0], -87.1875);
assert_eq!(metadata.coordinates["lon"][0], 0.0);
assert!(data.contains_key("t2m"));
assert!(data.contains_key("lat"));
assert!(data.contains_key("lon"));
assert!(data.contains_key("time"));
let t2m_data = &data["t2m"];
assert_eq!(t2m_data.shape(), &[53, 32, 64]);
let first_value = t2m_data[[0, 0, 0]];
let expected_value = 253.80; assert!(
(first_value - expected_value).abs() < 0.01,
"First value {} should be close to expected {}",
first_value,
expected_value
);
println!("Real climate data loaded and verified successfully");
Ok(())
}
#[test]
fn test_basic_netcdf() -> std::result::Result<(), Box<dyn std::error::Error>> {
let dir = tempdir()?;
let file_path = dir.path().join("minimal_test.nc");
println!("Creating a minimal NetCDF file at: {}", file_path.display());
let mut file = netcdf::create(&file_path)?;
println!("Adding dimension 'x' with size 2");
let _x_dim = file.add_dimension("x", 2)?;
println!("Adding variable 'data' with dimension 'x'");
let mut var = file.add_variable::<f32>("data", &["x"])?;
println!("METHOD 1: Using empty extents array");
let data = vec![1.0f32, 2.0f32];
match var.put_values(&data, &[] as &[netcdf::Extent]) {
Ok(_) => println!("SUCCESS: Method 1 worked"),
Err(e) => println!("FAILED: Method 1 error: {}", e),
}
println!("METHOD 3: Writing one value at a time");
match var.put_value(1.0f32, [0]) {
Ok(_) => println!("SUCCESS: Method 3a worked (first value)"),
Err(e) => println!("FAILED: Method 3a error: {}", e),
}
match var.put_value(2.0f32, [1]) {
Ok(_) => println!("SUCCESS: Method 3b worked (second value)"),
Err(e) => println!("FAILED: Method 3b error: {}", e),
}
println!("Syncing file");
file.sync()?;
println!("\nReading file back");
let file = netcdf::open(&file_path)?;
println!("Checking dimensions:");
for dim in file.dimensions() {
println!(" Dimension '{}' size: {}", dim.name(), dim.len());
}
println!("Checking variables:");
for var in file.variables() {
println!(
" Variable '{}' dimensions: {:?}",
var.name(),
var.dimensions()
);
match var.get_values::<f32, _>(&[] as &[netcdf::Extent]) {
Ok(values) => println!(" Values: {:?}", values),
Err(e) => println!(" Error reading values: {}", e),
}
}
Ok(())
}
#[test]
fn test_file_not_found() {
let result = load_netcdf_file(Path::new("/nonexistent/file.nc"));
assert!(result.is_err());
match result.unwrap_err() {
RossbyError::Io(e) => assert_eq!(e.kind(), std::io::ErrorKind::NotFound),
_ => panic!("Expected IO error"),
}
}
#[test]
fn test_netcdf_loading() -> Result<()> {
let dir = tempdir().unwrap();
let file_path = dir.path().join("test.nc");
create_test_netcdf_file(&file_path)?;
let (metadata, data) = load_netcdf_file(&file_path)?;
assert!(metadata.global_attributes.contains_key("title"));
assert!(metadata.dimensions.contains_key("lon"));
assert!(metadata.dimensions.contains_key("lat"));
assert!(metadata.dimensions.contains_key("time"));
assert!(metadata.variables.contains_key("temperature"));
assert!(metadata.coordinates.contains_key("lon"));
assert_eq!(metadata.dimensions["lon"].size, 2);
assert_eq!(metadata.dimensions["lat"].size, 2);
assert_eq!(metadata.dimensions["time"].size, 2);
assert_eq!(metadata.variables["temperature"].dimensions.len(), 3);
assert_eq!(metadata.coordinates["lon"], vec![0.0, 1.0]);
assert_eq!(metadata.coordinates["lat"], vec![0.0, 1.0]);
assert_eq!(metadata.coordinates["time"], vec![0.0, 1.0]);
assert!(data.contains_key("temperature"));
let temp_data = &data["temperature"];
assert_eq!(temp_data.shape(), &[2, 2, 2]);
assert_eq!(temp_data[[0, 0, 0]], 0.0);
assert_eq!(temp_data[[0, 0, 1]], 1.0);
assert_eq!(temp_data[[0, 1, 0]], 2.0);
Ok(())
}
#[test]
fn test_attribute_conversion() -> Result<()> {
let dir = tempdir().unwrap();
let file_path = dir.path().join("test_attr.nc");
println!("Creating test NetCDF file for attribute conversion test");
create_test_netcdf_file(&file_path)?;
println!("Test file created successfully");
println!("Loading NetCDF file for attribute test");
let (metadata, _) = load_netcdf_file(&file_path)?;
println!("File loaded successfully");
println!("Global attributes: {:?}", metadata.global_attributes.keys());
for (k, v) in &metadata.global_attributes {
println!(" Global attribute '{}': {:?}", k, v);
}
println!("Variables: {:?}", metadata.variables.keys());
for (name, var) in &metadata.variables {
println!(
" Variable '{}' attributes: {:?}",
name,
var.attributes.keys()
);
}
match &metadata.global_attributes["title"] {
AttributeValue::Text(text) => {
println!("Title attribute value: {}", text);
assert_eq!(text, "Rossby Test File");
}
_ => panic!("Expected Text attribute"),
}
match &metadata.variables["temperature"].attributes["units"] {
AttributeValue::Text(text) => {
println!("Temperature units attribute value: {}", text);
assert_eq!(text, "K");
}
_ => panic!("Expected Text attribute"),
}
match &metadata.variables["temperature"].attributes["long_name"] {
AttributeValue::Text(text) => {
println!("Temperature long_name attribute value: {}", text);
assert_eq!(text, "Temperature");
}
_ => panic!("Expected Text attribute"),
}
Ok(())
}
#[test]
fn test_validation() -> Result<()> {
let dir = tempdir().unwrap();
let file_path = dir.path().join("test_valid.nc");
println!("Creating test NetCDF file for validation test");
create_test_netcdf_file(&file_path)?;
println!("Test file created successfully");
println!("Loading NetCDF file for validation test");
let (metadata, data) = load_netcdf_file(&file_path)?;
println!("File loaded successfully");
println!("Metadata dimensions: {:?}", metadata.dimensions.keys());
println!("Metadata variables: {:?}", metadata.variables.keys());
println!("Metadata coordinates: {:?}", metadata.coordinates.keys());
println!("Data variables: {:?}", data.keys());
println!("Running validation...");
let validation_result = validate_netcdf_data(&metadata, &data);
if let Err(e) = &validation_result {
println!("Validation failed: {:?}", e);
} else {
println!("Validation passed");
}
assert!(validation_result.is_ok());
Ok(())
}
#[test]
fn test_scalar_variable_loading() -> Result<()> {
let dir = tempdir().unwrap();
let file_path = dir.path().join("scalar.nc");
let mut file = netcdf::create(&file_path)?;
file.add_attribute("title", "Scalar Test File")?;
{
let mut scalar_var = file.add_variable::<f32>("offset", &[])?;
scalar_var.put_attribute("units", "K")?;
scalar_var.put_value(42.5f32, ())?;
}
file.sync()?;
let (metadata, data) = load_netcdf_file(&file_path)?;
validate_netcdf_data(&metadata, &data)?;
let offset_meta = metadata.variables.get("offset").unwrap();
assert!(offset_meta.dimensions.is_empty());
assert!(offset_meta.shape.is_empty());
let offset_data = data.get("offset").unwrap();
assert_eq!(offset_data.shape(), &[] as &[usize]);
assert_eq!(offset_data.iter().copied().collect::<Vec<_>>(), vec![42.5]);
Ok(())
}
}