tensogram-netcdf 0.15.0

// (C) Copyright 2026- ECMWF and individual contributors.
//
// This software is licensed under the terms of the Apache Licence Version 2.0
// which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
// In applying this licence, ECMWF does not waive the privileges and immunities
// granted to it by virtue of its status as an intergovernmental organisation nor
// does it submit to any jurisdiction.

// Integration tests for tensogram-netcdf.
//
// Fixture files live in testdata/ — generated by testdata/generate.py.
// Run `python testdata/generate.py` to regenerate them.
//
// DecodedObject = (DataObjectDescriptor, Vec<u8>)
// Access descriptor via .0, data bytes via .1

use tensogram::{DecodeOptions, Dtype, decode};
use tensogram_netcdf::{ConvertOptions, DataPipeline, NetcdfError, SplitBy, convert_netcdf_file};

fn testdata(name: &str) -> std::path::PathBuf {
    let mut path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
    path.push("testdata");
    path.push(name);
    path
}

type DecodedMsg = (
    tensogram::types::GlobalMetadata,
    Vec<tensogram::types::DecodedObject>,
);

fn decode_first(msgs: &[Vec<u8>]) -> DecodedMsg {
    decode(&msgs[0], &DecodeOptions::default()).expect("decode failed")
}

// ── Task 6: Minimal converter ─────────────────────────────────────────────────

#[test]
fn simple_2d_produces_one_message() {
    let path = testdata("simple_2d.nc");
    let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
    assert_eq!(
        msgs.len(),
        1,
        "SplitBy::File should produce exactly 1 message"
    );
}

#[test]
fn simple_2d_has_one_object_with_correct_shape() {
    let path = testdata("simple_2d.nc");
    let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
    let (meta, objects) = decode_first(&msgs);
    assert_eq!(objects.len(), 1, "simple_2d.nc has 1 variable 'data'");
    assert_eq!(meta.base.len(), 1);
    assert_eq!(objects[0].0.shape, vec![5, 4]);
    assert_eq!(objects[0].0.dtype, Dtype::Float64);
}

#[test]
fn simple_2d_data_roundtrip() {
    let path = testdata("simple_2d.nc");
    let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
    let (_, objects) = decode_first(&msgs);
    // 5*4 = 20 f64 values = 160 bytes
    assert_eq!(objects[0].1.len(), 20 * 8);
}

// ── Task 7: Dtype matrix ──────────────────────────────────────────────────────

#[test]
fn multi_dtype_preserves_native_dtypes() {
    let path = testdata("multi_dtype.nc");
    let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
    let (meta, objects) = decode_first(&msgs);

    let mut dtype_map = std::collections::HashMap::new();
    for (i, obj) in objects.iter().enumerate() {
        if let Some(ciborium::Value::Text(name)) = meta.base[i].get("name") {
            dtype_map.insert(name.clone(), obj.0.dtype);
        }
    }

    // Variables without scale_factor/add_offset should preserve native dtype
    assert_eq!(dtype_map.get("i8"), Some(&Dtype::Int8));
    assert_eq!(dtype_map.get("i16"), Some(&Dtype::Int16));
    assert_eq!(dtype_map.get("i32"), Some(&Dtype::Int32));
    assert_eq!(dtype_map.get("i64"), Some(&Dtype::Int64));
    assert_eq!(dtype_map.get("u8"), Some(&Dtype::Uint8));
    assert_eq!(dtype_map.get("u16"), Some(&Dtype::Uint16));
    assert_eq!(dtype_map.get("u32"), Some(&Dtype::Uint32));
    assert_eq!(dtype_map.get("u64"), Some(&Dtype::Uint64));
    assert_eq!(dtype_map.get("f32"), Some(&Dtype::Float32));
    assert_eq!(dtype_map.get("f64"), Some(&Dtype::Float64));
}

// ── Task 8: Metadata extraction ───────────────────────────────────────────────

#[test]
fn cf_temperature_has_netcdf_metadata() {
    let path = testdata("cf_temperature.nc");
    let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
    let (meta, _) = decode_first(&msgs);

    let temp_entry = meta.base.iter().find(|e| {
        e.get("name").and_then(|v| {
            if let ciborium::Value::Text(s) = v {
                Some(s.as_str())
            } else {
                None
            }
        }) == Some("temperature")
    });
    assert!(
        temp_entry.is_some(),
        "temperature variable should be in base"
    );

    let entry = temp_entry.unwrap();
    let netcdf_meta = entry.get("netcdf").expect("should have netcdf metadata");
    assert!(
        matches!(netcdf_meta, ciborium::Value::Map(_)),
        "netcdf metadata should be a map"
    );
}

#[test]
fn cf_temperature_no_cf_key_without_flag() {
    let path = testdata("cf_temperature.nc");
    let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
    let (meta, _) = decode_first(&msgs);

    for entry in &meta.base {
        assert!(!entry.contains_key("cf"), "no 'cf' key without --cf flag");
    }
}

#[test]
fn cf_temperature_has_cf_key_with_flag() {
    let path = testdata("cf_temperature.nc");
    let opts = ConvertOptions {
        cf: true,
        ..Default::default()
    };
    let msgs = convert_netcdf_file(&path, &opts).unwrap();
    let (meta, _) = decode_first(&msgs);

    let temp_entry = meta.base.iter().find(|e| {
        e.get("name").and_then(|v| {
            if let ciborium::Value::Text(s) = v {
                Some(s.as_str())
            } else {
                None
            }
        }) == Some("temperature")
    });
    assert!(temp_entry.is_some());
    let entry = temp_entry.unwrap();
    assert!(
        entry.contains_key("cf"),
        "temperature should have 'cf' key with --cf flag"
    );
}

// ── Task 9: Packed data + fill values ─────────────────────────────────────────

#[test]
fn cf_temperature_unpacks_to_f64() {
    let path = testdata("cf_temperature.nc");
    let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
    let (_, objects) = decode_first(&msgs);

    // temperature has scale_factor/add_offset → should be unpacked to f64
    let has_f64 = objects.iter().any(|o| o.0.dtype == Dtype::Float64);
    assert!(has_f64, "packed variable should be unpacked to f64");
}

#[test]
fn multi_dtype_nan_values_in_f64_with_nan() {
    let path = testdata("multi_dtype.nc");
    let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
    let (meta, objects) = decode_first(&msgs);

    let nan_idx = meta.base.iter().position(|e| {
        e.get("name").and_then(|v| {
            if let ciborium::Value::Text(s) = v {
                Some(s.as_str())
            } else {
                None
            }
        }) == Some("f64_with_nan")
    });

    if let Some(idx) = nan_idx {
        let data = &objects[idx].1;
        let floats: Vec<f64> = data
            .chunks_exact(8)
            .map(|b| f64::from_le_bytes(b.try_into().unwrap()))
            .collect();
        assert!(
            floats.iter().any(|v: &f64| v.is_nan()),
            "f64_with_nan should contain NaN values"
        );
    }
}

// ── Task 10: --split-by grouping ──────────────────────────────────────────────

#[test]
fn split_by_file_produces_one_message() {
    let path = testdata("multi_var.nc");
    let opts = ConvertOptions {
        split_by: SplitBy::File,
        ..Default::default()
    };
    let msgs = convert_netcdf_file(&path, &opts).unwrap();
    assert_eq!(msgs.len(), 1);
}

#[test]
fn split_by_variable_produces_one_message_per_variable() {
    let path = testdata("multi_var.nc");
    let opts = ConvertOptions {
        split_by: SplitBy::Variable,
        ..Default::default()
    };
    let msgs = convert_netcdf_file(&path, &opts).unwrap();
    // multi_var.nc has temperature, humidity, pressure (char var is skipped)
    assert!(
        msgs.len() >= 3,
        "should have at least 3 messages for 3 numeric vars"
    );
    // Each message should have exactly 1 object
    for msg in &msgs {
        let (_, objects) = decode(msg, &DecodeOptions::default()).unwrap();
        assert_eq!(
            objects.len(),
            1,
            "each message should have 1 object in variable mode"
        );
    }
}

#[test]
fn split_by_record_produces_one_message_per_record() {
    let path = testdata("unlimited_time.nc");
    let opts = ConvertOptions {
        split_by: SplitBy::Record,
        ..Default::default()
    };
    let msgs = convert_netcdf_file(&path, &opts).unwrap();
    // unlimited_time.nc has 5 timesteps
    assert_eq!(
        msgs.len(),
        5,
        "should produce 5 messages for 5 time records"
    );
}

#[test]
fn split_by_record_errors_without_unlimited_dim() {
    let path = testdata("simple_2d.nc");
    let opts = ConvertOptions {
        split_by: SplitBy::Record,
        ..Default::default()
    };
    let result = convert_netcdf_file(&path, &opts);
    assert!(
        matches!(result, Err(NetcdfError::NoUnlimitedDimension { .. })),
        "should error when no unlimited dimension"
    );
}

// ── Task 11: --cf flag ────────────────────────────────────────────────────────

#[test]
fn cf_flag_extracts_standard_name() {
    let path = testdata("cf_temperature.nc");
    let opts = ConvertOptions {
        cf: true,
        ..Default::default()
    };
    let msgs = convert_netcdf_file(&path, &opts).unwrap();
    let (meta, _) = decode_first(&msgs);

    let temp_entry = meta.base.iter().find(|e| {
        e.get("name").and_then(|v| {
            if let ciborium::Value::Text(s) = v {
                Some(s.as_str())
            } else {
                None
            }
        }) == Some("temperature")
    });
    assert!(temp_entry.is_some());
    let cf_val = temp_entry.unwrap().get("cf").expect("should have cf key");
    if let ciborium::Value::Map(cf_map) = cf_val {
        let has_standard_name = cf_map
            .iter()
            .any(|(k, _)| matches!(k, ciborium::Value::Text(s) if s == "standard_name"));
        assert!(has_standard_name, "cf map should contain standard_name");
    } else {
        panic!("cf value should be a map");
    }
}

// ── Task 12: Edge cases ───────────────────────────────────────────────────────

#[test]
fn empty_file_returns_no_variables_error() {
    let path = testdata("empty_file.nc");
    let result = convert_netcdf_file(&path, &ConvertOptions::default());
    assert!(
        matches!(result, Err(NetcdfError::NoVariables)),
        "empty file should return NoVariables error, got: {:?}",
        result
    );
}

#[test]
fn nc3_classic_converts_successfully() {
    let path = testdata("nc3_classic.nc");
    let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
    assert_eq!(
        msgs.len(),
        1,
        "NC3 classic file should convert to 1 message"
    );
    let (_, objects) = decode_first(&msgs);
    assert!(!objects.is_empty(), "should have at least 1 object");
}

#[test]
fn nc4_groups_warns_and_converts_root_only() {
    // nc4_groups.nc has root_var in root + predicted in sub-group
    // We should get root_var but not predicted
    let path = testdata("nc4_groups.nc");
    let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
    assert_eq!(msgs.len(), 1);
    let (meta, _) = decode_first(&msgs);

    let has_root_var = meta.base.iter().any(|e| {
        e.get("name").and_then(|v| {
            if let ciborium::Value::Text(s) = v {
                Some(s.as_str())
            } else {
                None
            }
        }) == Some("root_var")
    });
    assert!(has_root_var, "root_var should be present");

    let has_predicted = meta.base.iter().any(|e| {
        e.get("name").and_then(|v| {
            if let ciborium::Value::Text(s) = v {
                Some(s.as_str())
            } else {
                None
            }
        }) == Some("predicted")
    });
    assert!(
        !has_predicted,
        "predicted (sub-group var) should NOT be present"
    );
}

#[test]
fn multi_var_skips_char_variables() {
    let path = testdata("multi_var.nc");
    let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
    let (meta, _) = decode_first(&msgs);

    let has_description = meta.base.iter().any(|e| {
        e.get("name").and_then(|v| {
            if let ciborium::Value::Text(s) = v {
                Some(s.as_str())
            } else {
                None
            }
        }) == Some("description")
    });
    assert!(
        !has_description,
        "char variable 'description' should be skipped"
    );
}

#[test]
fn multi_var_has_all_numeric_variables() {
    let path = testdata("multi_var.nc");
    let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
    let (meta, _) = decode_first(&msgs);
    let names: Vec<&str> = meta
        .base
        .iter()
        .filter_map(|e| {
            e.get("name").and_then(|v| {
                if let ciborium::Value::Text(s) = v {
                    Some(s.as_str())
                } else {
                    None
                }
            })
        })
        .collect();
    assert!(
        names.contains(&"temperature"),
        "temperature should be present"
    );
    assert!(names.contains(&"humidity"), "humidity should be present");
    assert!(names.contains(&"pressure"), "pressure should be present");
}

#[test]
fn scalar_variable_has_empty_shape() {
    // multi_dtype.nc has a scalar 'pi' variable (ndim=0)
    let path = testdata("multi_dtype.nc");
    let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
    let (meta, objects) = decode_first(&msgs);

    let pi_idx = meta.base.iter().position(|e| {
        e.get("name").and_then(|v| {
            if let ciborium::Value::Text(s) = v {
                Some(s.as_str())
            } else {
                None
            }
        }) == Some("pi")
    });

    if let Some(idx) = pi_idx {
        assert_eq!(
            objects[idx].0.shape,
            vec![],
            "scalar should have empty shape"
        );
        assert_eq!(objects[idx].0.ndim, 0, "scalar should have ndim=0");
    }
    // If pi not found, fixture may not have it — test is vacuously passing
}

#[test]
fn unlimited_time_static_var_included_in_record_split() {
    // unlimited_time.nc has temp(time,y,x) and mask(y,x)
    // In record split, mask should appear in each record message
    let path = testdata("unlimited_time.nc");
    let opts = ConvertOptions {
        split_by: SplitBy::Record,
        ..Default::default()
    };
    let msgs = convert_netcdf_file(&path, &opts).unwrap();
    assert!(!msgs.is_empty());

    let (meta, _) = decode(&msgs[0], &DecodeOptions::default()).unwrap();
    let has_mask = meta.base.iter().any(|e| {
        e.get("name").and_then(|v| {
            if let ciborium::Value::Text(s) = v {
                Some(s.as_str())
            } else {
                None
            }
        }) == Some("mask")
    });
    assert!(
        has_mask,
        "static variable 'mask' should appear in each record message"
    );
}

// ── Task 13b: --encoding/--filter/--compression pipeline flags ────────────────

#[test]
fn default_pipeline_stays_none() {
    // Regression test: ConvertOptions::default() must produce raw uncompressed
    // descriptors so the previous 21 tests stay byte-identical.
    let path = testdata("simple_2d.nc");
    let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
    let (_, objects) = decode_first(&msgs);
    assert_eq!(objects[0].0.encoding, "none");
    assert_eq!(objects[0].0.filter, "none");
    assert_eq!(objects[0].0.compression, "none");
    assert!(
        objects[0].0.params.is_empty(),
        "default pipeline must not insert any params"
    );
}

#[test]
fn simple_2d_with_simple_packing_24bit() {
    let path = testdata("simple_2d.nc");
    let opts = ConvertOptions {
        pipeline: DataPipeline {
            encoding: "simple_packing".to_string(),
            bits: Some(24),
            ..Default::default()
        },
        ..Default::default()
    };
    let msgs = convert_netcdf_file(&path, &opts).unwrap();
    let (_, objects) = decode_first(&msgs);
    assert_eq!(objects[0].0.encoding, "simple_packing");
    let bpv = objects[0]
        .0
        .params
        .get("bits_per_value")
        .expect("bits_per_value param");
    if let ciborium::Value::Integer(i) = bpv {
        let n: i128 = (*i).into();
        assert_eq!(n, 24, "bits_per_value should be 24");
    } else {
        panic!("bits_per_value should be an integer");
    }
    // simple_packing also requires reference_value, binary_scale_factor,
    // decimal_scale_factor — verify they're present.
    assert!(objects[0].0.params.contains_key("reference_value"));
    assert!(objects[0].0.params.contains_key("binary_scale_factor"));
    assert!(objects[0].0.params.contains_key("decimal_scale_factor"));
}

#[test]
fn simple_2d_with_shuffle_plus_zstd() {
    let path = testdata("simple_2d.nc");
    let opts = ConvertOptions {
        pipeline: DataPipeline {
            filter: "shuffle".to_string(),
            compression: "zstd".to_string(),
            ..Default::default()
        },
        ..Default::default()
    };
    let msgs = convert_netcdf_file(&path, &opts).unwrap();
    let (_, objects) = decode_first(&msgs);
    assert_eq!(objects[0].0.filter, "shuffle");
    assert_eq!(objects[0].0.compression, "zstd");

    // shuffle_element_size should be 8 for raw float64 data.
    let element_size = objects[0]
        .0
        .params
        .get("shuffle_element_size")
        .expect("shuffle_element_size param");
    if let ciborium::Value::Integer(i) = element_size {
        let n: i128 = (*i).into();
        assert_eq!(n, 8);
    } else {
        panic!("shuffle_element_size should be an integer");
    }

    // Default zstd_level = 3.
    let level = objects[0]
        .0
        .params
        .get("zstd_level")
        .expect("zstd_level param");
    if let ciborium::Value::Integer(i) = level {
        let n: i128 = (*i).into();
        assert_eq!(n, 3);
    } else {
        panic!("zstd_level should be an integer");
    }
}

#[test]
fn simple_2d_with_zstd_custom_level() {
    let path = testdata("simple_2d.nc");
    let opts = ConvertOptions {
        pipeline: DataPipeline {
            compression: "zstd".to_string(),
            compression_level: Some(7),
            ..Default::default()
        },
        ..Default::default()
    };
    let msgs = convert_netcdf_file(&path, &opts).unwrap();
    let (_, objects) = decode_first(&msgs);
    let level = objects[0]
        .0
        .params
        .get("zstd_level")
        .expect("zstd_level param");
    if let ciborium::Value::Integer(i) = level {
        let n: i128 = (*i).into();
        assert_eq!(n, 7);
    } else {
        panic!("zstd_level should be an integer");
    }
}

#[test]
fn simple_2d_with_lz4_compression() {
    let path = testdata("simple_2d.nc");
    let opts = ConvertOptions {
        pipeline: DataPipeline {
            compression: "lz4".to_string(),
            ..Default::default()
        },
        ..Default::default()
    };
    let msgs = convert_netcdf_file(&path, &opts).unwrap();
    let (_, objects) = decode_first(&msgs);
    assert_eq!(objects[0].0.compression, "lz4");
}

#[test]
fn simple_2d_with_szip_compression() {
    // szip only supports ≤32-bit samples, so it must be combined with
    // simple_packing or shuffle (both reduce the per-sample bit width).
    // Here we use simple_packing 16-bit which puts bits_per_sample at 16.
    let path = testdata("simple_2d.nc");
    let opts = ConvertOptions {
        pipeline: DataPipeline {
            encoding: "simple_packing".to_string(),
            bits: Some(16),
            compression: "szip".to_string(),
            ..Default::default()
        },
        ..Default::default()
    };
    let msgs = convert_netcdf_file(&path, &opts).unwrap();
    let (_, objects) = decode_first(&msgs);
    assert_eq!(objects[0].0.compression, "szip");
    assert!(objects[0].0.params.contains_key("szip_rsi"));
    assert!(objects[0].0.params.contains_key("szip_block_size"));
    assert!(objects[0].0.params.contains_key("szip_flags"));
}

#[test]
fn simple_2d_with_shuffle_szip_combo() {
    // Alternative szip path: shuffle reduces bits_per_sample to 8.
    let path = testdata("simple_2d.nc");
    let opts = ConvertOptions {
        pipeline: DataPipeline {
            filter: "shuffle".to_string(),
            compression: "szip".to_string(),
            ..Default::default()
        },
        ..Default::default()
    };
    let msgs = convert_netcdf_file(&path, &opts).unwrap();
    let (_, objects) = decode_first(&msgs);
    assert_eq!(objects[0].0.filter, "shuffle");
    assert_eq!(objects[0].0.compression, "szip");
}

#[test]
fn simple_2d_with_blosc2_compression() {
    let path = testdata("simple_2d.nc");
    let opts = ConvertOptions {
        pipeline: DataPipeline {
            compression: "blosc2".to_string(),
            compression_level: Some(9),
            ..Default::default()
        },
        ..Default::default()
    };
    let msgs = convert_netcdf_file(&path, &opts).unwrap();
    let (_, objects) = decode_first(&msgs);
    assert_eq!(objects[0].0.compression, "blosc2");
    let clevel = objects[0]
        .0
        .params
        .get("blosc2_clevel")
        .expect("blosc2_clevel param");
    if let ciborium::Value::Integer(i) = clevel {
        let n: i128 = (*i).into();
        assert_eq!(n, 9);
    } else {
        panic!("blosc2_clevel should be an integer");
    }
}

#[test]
fn unknown_compression_errors() {
    let path = testdata("simple_2d.nc");
    let opts = ConvertOptions {
        pipeline: DataPipeline {
            compression: "bogus".to_string(),
            ..Default::default()
        },
        ..Default::default()
    };
    let result = convert_netcdf_file(&path, &opts);
    assert!(result.is_err(), "unknown compression should error");
    let err = result.unwrap_err();
    let msg = format!("{err}");
    assert!(
        msg.contains("bogus"),
        "error should mention 'bogus', got: {msg}"
    );
}

#[test]
fn unknown_encoding_errors() {
    let path = testdata("simple_2d.nc");
    let opts = ConvertOptions {
        pipeline: DataPipeline {
            encoding: "magic_packing".to_string(),
            ..Default::default()
        },
        ..Default::default()
    };
    let result = convert_netcdf_file(&path, &opts);
    assert!(result.is_err(), "unknown encoding should error");
    let msg = format!("{}", result.unwrap_err());
    assert!(msg.contains("magic_packing"));
}

#[test]
fn unknown_filter_errors() {
    let path = testdata("simple_2d.nc");
    let opts = ConvertOptions {
        pipeline: DataPipeline {
            filter: "wibble".to_string(),
            ..Default::default()
        },
        ..Default::default()
    };
    let result = convert_netcdf_file(&path, &opts);
    assert!(result.is_err(), "unknown filter should error");
    let msg = format!("{}", result.unwrap_err());
    assert!(msg.contains("wibble"));
}

#[test]
fn simple_packing_skips_non_f64_variables() {
    // multi_dtype.nc has int8/u16/f32/f64 variables. simple_packing should
    // be applied to f64 only and silently passed through (with a stderr
    // warning) for the others — the file conversion as a whole succeeds.
    let path = testdata("multi_dtype.nc");
    let opts = ConvertOptions {
        pipeline: DataPipeline {
            encoding: "simple_packing".to_string(),
            bits: Some(16),
            ..Default::default()
        },
        ..Default::default()
    };
    let msgs = convert_netcdf_file(&path, &opts).unwrap();
    let (meta, objects) = decode_first(&msgs);

    // Find the "f64" variable and verify it has simple_packing.
    // Find an integer variable and verify it does NOT.
    let mut saw_packed_f64 = false;
    let mut saw_unpacked_int = false;
    for (i, obj) in objects.iter().enumerate() {
        let name = match meta.base[i].get("name") {
            Some(ciborium::Value::Text(s)) => s.as_str(),
            _ => continue,
        };
        if name == "f64" {
            assert_eq!(
                obj.0.encoding, "simple_packing",
                "f64 variable should be simple_packed"
            );
            saw_packed_f64 = true;
        } else if name == "i32" || name == "i16" || name == "i8" {
            assert_eq!(
                obj.0.encoding, "none",
                "{name} (non-f64) should NOT be simple_packed"
            );
            saw_unpacked_int = true;
        }
    }
    assert!(saw_packed_f64, "should have seen the f64 variable");
    assert!(saw_unpacked_int, "should have seen an int variable");
}

#[test]
fn pipeline_round_trip_zstd_decodes_back_to_input() {
    // simple_2d.nc has 5×4 = 20 f64 values. Encode with zstd and decode
    // back — the recovered bytes must be byte-identical to the default
    // (no-pipeline) output.
    let path = testdata("simple_2d.nc");

    let baseline = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
    let (_, baseline_objects) = decode_first(&baseline);
    let baseline_bytes = baseline_objects[0].1.clone();

    let zstd_opts = ConvertOptions {
        pipeline: DataPipeline {
            compression: "zstd".to_string(),
            ..Default::default()
        },
        ..Default::default()
    };
    let zstd_msgs = convert_netcdf_file(&path, &zstd_opts).unwrap();
    let (_, zstd_objects) = decode_first(&zstd_msgs);

    assert_eq!(
        zstd_objects[0].1, baseline_bytes,
        "zstd round-trip should recover the original payload bytes exactly"
    );
}

#[test]
fn simple_packing_plus_shuffle_uses_post_encoding_element_size() {
    // Combining simple_packing + shuffle exercises the branch in
    // apply_pipeline that computes shuffle_element_size from the
    // post-encoding byte width (⌈bpv/8⌉), not the native dtype width.
    let path = testdata("simple_2d.nc");
    let opts = ConvertOptions {
        pipeline: DataPipeline {
            encoding: "simple_packing".to_string(),
            bits: Some(16),
            filter: "shuffle".to_string(),
            ..Default::default()
        },
        ..Default::default()
    };
    let msgs = convert_netcdf_file(&path, &opts).unwrap();
    let (_, objects) = decode_first(&msgs);
    assert_eq!(objects[0].0.encoding, "simple_packing");
    assert_eq!(objects[0].0.filter, "shuffle");

    // With bits=16, element_size should be ⌈16/8⌉ = 2, not the native
    // f64 width of 8.
    let element_size = objects[0]
        .0
        .params
        .get("shuffle_element_size")
        .expect("shuffle_element_size");
    if let ciborium::Value::Integer(i) = element_size {
        let n: i128 = (*i).into();
        assert_eq!(n, 2, "element_size should be ⌈16/8⌉ = 2 post-pack");
    } else {
        panic!("shuffle_element_size should be integer");
    }
}

// ── Code coverage pass: exercise every read_native_extents dtype arm ──
//
// record_multi_dtype.nc has one variable of every supported numeric
// dtype along the unlimited `time` dimension. Record-split mode then
// forces the converter to hit each arm of `read_native_extents`.

#[test]
fn record_multi_dtype_covers_all_read_native_extents_arms() {
    let path = testdata("record_multi_dtype.nc");
    let opts = ConvertOptions {
        split_by: SplitBy::Record,
        ..Default::default()
    };
    let msgs = convert_netcdf_file(&path, &opts).unwrap();
    // 3 records → 3 messages.
    assert_eq!(msgs.len(), 3, "record_multi_dtype has 3 timesteps");

    // Inspect the first message: we expect one object per dtype
    // variable (10 dtypes) plus nothing for the time coord (which
    // itself IS the unlimited variable, so it's not sliced per-record
    // but included). Actually the time variable has time dim so it's
    // sliced too — but it's scalar per record (index-selected), so
    // it may show up with ndim=0.
    let (meta, objects) = decode_first(&msgs);
    let names: Vec<String> = meta
        .base
        .iter()
        .filter_map(|e| {
            e.get("name").and_then(|v| {
                if let ciborium::Value::Text(s) = v {
                    Some(s.clone())
                } else {
                    None
                }
            })
        })
        .collect();

    // Verify each dtype variable is present and has the expected Tensogram dtype.
    let mut by_name = std::collections::HashMap::new();
    for (i, obj) in objects.iter().enumerate() {
        if let Some(name) = names.get(i) {
            by_name.insert(name.clone(), obj.0.dtype);
        }
    }
    assert_eq!(by_name.get("v_i8"), Some(&Dtype::Int8));
    assert_eq!(by_name.get("v_u8"), Some(&Dtype::Uint8));
    assert_eq!(by_name.get("v_i16"), Some(&Dtype::Int16));
    assert_eq!(by_name.get("v_u16"), Some(&Dtype::Uint16));
    assert_eq!(by_name.get("v_i32"), Some(&Dtype::Int32));
    assert_eq!(by_name.get("v_u32"), Some(&Dtype::Uint32));
    assert_eq!(by_name.get("v_i64"), Some(&Dtype::Int64));
    assert_eq!(by_name.get("v_u64"), Some(&Dtype::Uint64));
    assert_eq!(by_name.get("v_f32"), Some(&Dtype::Float32));
    assert_eq!(by_name.get("v_f64"), Some(&Dtype::Float64));
}

// ── Code coverage pass: get_f64_attr non-Double arms ─────────────────
//
// attr_type_variants.nc has scaled variables whose scale_factor /
// add_offset attributes are stored as Float / Int / Short / Longlong
// (instead of the usual Double). Converting the file runs each of
// those through read_and_unpack → get_f64_attr, covering the
// AttributeValue::Float/Int/Short/Longlong arms.

#[test]
fn attr_type_variants_all_unpack_to_f64() {
    let path = testdata("attr_type_variants.nc");
    let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
    let (meta, objects) = decode_first(&msgs);

    // Every variable in the fixture has scale_factor → all should be
    // unpacked to Float64.
    let mut by_name = std::collections::HashMap::new();
    for (i, obj) in objects.iter().enumerate() {
        if let Some(ciborium::Value::Text(name)) = meta.base[i].get("name") {
            by_name.insert(name.clone(), obj.0.dtype);
        }
    }
    assert_eq!(by_name.get("scaled_float"), Some(&Dtype::Float64));
    assert_eq!(by_name.get("scaled_int"), Some(&Dtype::Float64));
    assert_eq!(by_name.get("scaled_short"), Some(&Dtype::Float64));
    assert_eq!(by_name.get("scaled_longlong"), Some(&Dtype::Float64));
    assert_eq!(by_name.get("with_missing"), Some(&Dtype::Float64));
}

#[test]
fn attr_type_variants_string_scale_factor_returns_raw_data() {
    // `string_scale` has scale_factor="non_numeric" — a Str attribute
    // that get_f64_attr can't convert. The fallback `_ => None` arm
    // fires and read_and_unpack still runs (because attribute "exists")
    // but with all Option<f64>s = None, so values pass through unchanged.
    let path = testdata("attr_type_variants.nc");
    let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
    let (meta, objects) = decode_first(&msgs);

    let idx = meta
        .base
        .iter()
        .position(|e| e.get("name") == Some(&ciborium::Value::Text("string_scale".to_string())))
        .expect("string_scale variable");
    let data = &objects[idx].1;
    // read_and_unpack promotes the i16 values to f64 even though the
    // scale_factor is string — verify the values are [1.0, 2.0, 3.0, 4.0].
    let floats: Vec<f64> = data
        .chunks_exact(8)
        .map(|b| f64::from_le_bytes(b.try_into().unwrap()))
        .collect();
    assert_eq!(floats, vec![1.0, 2.0, 3.0, 4.0]);
}

#[test]
fn empty_unlimited_record_split_returns_no_messages() {
    // empty_unlimited.nc has an unlimited dim with zero records.
    // The converter's record-split path should return Ok(vec![])
    // early without trying to iterate any records.
    let path = testdata("empty_unlimited.nc");
    let opts = ConvertOptions {
        split_by: SplitBy::Record,
        ..Default::default()
    };
    let msgs = convert_netcdf_file(&path, &opts).unwrap();
    assert_eq!(
        msgs.len(),
        0,
        "zero-record file should produce zero messages"
    );
}

#[test]
fn complex_types_unlimited_skipped_in_record_split() {
    // complex_types_unlimited.nc has value(time,n) float + state(time,n)
    // enum. Record-split calls extract_variable_record on both; the
    // enum hits the Compound/Opaque/Enum/Vlen rejection and is skipped
    // with a warning while value is converted normally.
    let path = testdata("complex_types_unlimited.nc");
    let opts = ConvertOptions {
        split_by: SplitBy::Record,
        ..Default::default()
    };
    let msgs = convert_netcdf_file(&path, &opts).unwrap();
    // time dim has 2 values → 2 messages.
    assert_eq!(msgs.len(), 2);

    for msg in &msgs {
        let (meta, _) = decode(msg, &DecodeOptions::default()).unwrap();
        let names: Vec<String> = meta
            .base
            .iter()
            .filter_map(|e| {
                e.get("name").and_then(|v| {
                    if let ciborium::Value::Text(s) = v {
                        Some(s.clone())
                    } else {
                        None
                    }
                })
            })
            .collect();
        assert!(names.contains(&"value".to_string()));
        assert!(
            !names.contains(&"state".to_string()),
            "enum variable 'state' should be skipped in record split"
        );
    }
}

#[test]
fn complex_types_are_skipped_with_warning() {
    // complex_types.nc has one regular float variable `value` and
    // one enum-typed variable `status`. The enum variable should
    // be skipped with a warning (triggering the Compound/Opaque/
    // Enum/Vlen rejection arm), while `value` should convert
    // normally — so the overall conversion succeeds.
    let path = testdata("complex_types.nc");
    let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
    let (meta, objects) = decode_first(&msgs);

    let names: Vec<String> = meta
        .base
        .iter()
        .filter_map(|e| {
            e.get("name").and_then(|v| {
                if let ciborium::Value::Text(s) = v {
                    Some(s.clone())
                } else {
                    None
                }
            })
        })
        .collect();

    assert!(names.contains(&"value".to_string()));
    assert!(
        !names.contains(&"status".to_string()),
        "enum variable 'status' should be skipped"
    );
    assert_eq!(objects.len(), 1);
}

#[test]
fn record_with_char_skips_char_variable() {
    // record_with_char.nc has a values(time,n) numeric variable AND
    // a labels(time,strlen) char variable sharing the unlimited dim.
    // Record-split should skip the char variable (with a warning)
    // and still produce one message per record for values.
    let path = testdata("record_with_char.nc");
    let opts = ConvertOptions {
        split_by: SplitBy::Record,
        ..Default::default()
    };
    let msgs = convert_netcdf_file(&path, &opts).unwrap();
    // time dim has 2 values → 2 messages.
    assert_eq!(msgs.len(), 2);

    // Each message should have `values` but NOT `labels`.
    for msg in &msgs {
        let (meta, _) = decode(msg, &DecodeOptions::default()).unwrap();
        let names: Vec<String> = meta
            .base
            .iter()
            .filter_map(|e| {
                e.get("name").and_then(|v| {
                    if let ciborium::Value::Text(s) = v {
                        Some(s.clone())
                    } else {
                        None
                    }
                })
            })
            .collect();
        assert!(names.contains(&"values".to_string()));
        assert!(
            !names.contains(&"labels".to_string()),
            "char variable 'labels' should be skipped in record split"
        );
    }
}

#[test]
fn attr_type_variants_missing_value_replaced_with_nan() {
    // `with_missing` has `missing_value=-1`; the converter reads it as
    // f64 -1.0, matches the fill sentinel, and writes NaN at those
    // positions. This explicitly exercises the NaN-substitution
    // branch in read_and_unpack.
    let path = testdata("attr_type_variants.nc");
    let msgs = convert_netcdf_file(&path, &ConvertOptions::default()).unwrap();
    let (meta, objects) = decode_first(&msgs);

    let idx = meta
        .base
        .iter()
        .position(|e| e.get("name") == Some(&ciborium::Value::Text("with_missing".to_string())))
        .expect("with_missing variable");
    let data = &objects[idx].1;
    let floats: Vec<f64> = data
        .chunks_exact(8)
        .map(|b| f64::from_le_bytes(b.try_into().unwrap()))
        .collect();
    let nan_count = floats.iter().filter(|v| v.is_nan()).count();
    assert_eq!(
        nan_count, 2,
        "with_missing has 2 sentinel values → 2 NaNs, got {nan_count} in {floats:?}"
    );
    // The non-sentinel positions should have been scaled by
    // scale_factor=1.0 → unchanged.
    assert!(floats.contains(&5.0));
    assert!(floats.contains(&10.0));
}

#[test]
fn record_multi_dtype_records_are_independent() {
    // Record-split should produce one self-contained message per
    // record, not share state across records.
    let path = testdata("record_multi_dtype.nc");
    let opts = ConvertOptions {
        split_by: SplitBy::Record,
        ..Default::default()
    };
    let msgs = convert_netcdf_file(&path, &opts).unwrap();
    assert_eq!(msgs.len(), 3);

    // Every message should contain the same set of variables.
    let mut last_names: Option<Vec<String>> = None;
    for msg in &msgs {
        let (meta, _) = decode(msg, &DecodeOptions::default()).unwrap();
        let mut names: Vec<String> = meta
            .base
            .iter()
            .filter_map(|e| {
                e.get("name").and_then(|v| {
                    if let ciborium::Value::Text(s) = v {
                        Some(s.clone())
                    } else {
                        None
                    }
                })
            })
            .collect();
        names.sort();
        match &last_names {
            Some(prev) => assert_eq!(&names, prev, "each record should have same var set"),
            None => last_names = Some(names),
        }
    }
}