#[cfg(test)]
mod diagnose_tests {
use super::*;
fn diagnose_metrics(
cosine_similarity: f32,
max_abs_diff: f32,
cpu_argmax: u32,
gpu_argmax: u32,
cpu_nan: usize,
gpu_nan: usize,
position: usize,
) -> SpcMetrics {
SpcMetrics {
position,
token_id: position as u32,
cpu_argmax,
gpu_argmax,
_cpu_top_logit: 1.0,
_gpu_top_logit: 1.0,
max_abs_diff,
_max_diff_idx: 0,
mean_abs_diff: max_abs_diff * 0.5,
rmse: max_abs_diff * 0.3,
cosine_similarity,
kl_divergence: if cosine_similarity > 0.999 { 0.001 } else { 5.0 },
sigma_level: if cosine_similarity > 0.999 { 6.0 } else { 0.5 },
cpu_nan,
gpu_nan,
out_of_spec_count: 0,
vocab_size: 32000,
}
}
#[test]
fn test_auto_diagnose_no_failures() {
let metrics = vec![
diagnose_metrics(1.0, 0.0, 42, 42, 0, 0, 0),
diagnose_metrics(1.0, 0.0, 42, 42, 0, 0, 1),
diagnose_metrics(1.0, 0.0, 42, 42, 0, 0, 2),
];
auto_diagnose(&metrics, 3584, 28, 4);
}
#[test]
fn test_auto_diagnose_empty_metrics() {
auto_diagnose(&[], 3584, 28, 4);
}
#[test]
fn test_auto_diagnose_pos0_catastrophic() {
let metrics = vec![
diagnose_metrics(0.5, 10.0, 42, 100, 0, 0, 0), diagnose_metrics(0.5, 10.0, 42, 100, 0, 0, 1),
diagnose_metrics(0.5, 10.0, 42, 100, 0, 0, 2),
];
auto_diagnose(&metrics, 3584, 28, 4);
}
#[test]
fn test_auto_diagnose_growing_divergence() {
let metrics = vec![
diagnose_metrics(0.9999, 0.1, 42, 42, 0, 0, 0), diagnose_metrics(0.998, 0.5, 42, 42, 0, 0, 1), diagnose_metrics(0.995, 1.5, 42, 43, 0, 0, 2), diagnose_metrics(0.990, 3.0, 42, 50, 0, 0, 3), diagnose_metrics(0.980, 6.0, 42, 100, 0, 0, 4), diagnose_metrics(0.950, 12.0, 42, 200, 0, 0, 5), ];
auto_diagnose(&metrics, 4096, 32, 8);
}
#[test]
fn test_auto_diagnose_high_cos_wrong_argmax() {
let metrics = vec![
diagnose_metrics(0.9999, 0.1, 42, 42, 0, 0, 0), diagnose_metrics(0.995, 0.5, 42, 43, 0, 0, 1), ];
auto_diagnose(&metrics, 2048, 16, 4);
}
#[test]
fn test_auto_diagnose_all_fail_uniformly() {
let metrics = vec![
diagnose_metrics(0.5, 10.0, 42, 100, 0, 0, 0),
diagnose_metrics(0.5, 10.0, 42, 100, 0, 0, 1),
diagnose_metrics(0.5, 10.0, 42, 100, 0, 0, 2),
diagnose_metrics(0.5, 10.0, 42, 100, 0, 0, 3),
];
auto_diagnose(&metrics, 1536, 12, 4);
}
#[test]
fn test_auto_diagnose_nan_failures() {
let metrics = vec![
diagnose_metrics(1.0, 0.0, 42, 42, 3, 0, 0), diagnose_metrics(1.0, 0.0, 42, 42, 0, 5, 1), ];
auto_diagnose(&metrics, 3584, 28, 4);
}
#[test]
fn test_auto_diagnose_zero_kv_heads() {
let metrics = vec![
diagnose_metrics(0.5, 10.0, 42, 100, 0, 0, 0),
];
auto_diagnose(&metrics, 3584, 28, 0);
}
#[test]
fn test_auto_diagnose_zero_num_heads() {
let metrics = vec![
diagnose_metrics(0.5, 10.0, 42, 100, 0, 0, 0),
];
auto_diagnose(&metrics, 3584, 0, 0);
}
#[test]
fn test_auto_diagnose_mixed_pass_fail() {
let metrics = vec![
diagnose_metrics(1.0, 0.0, 42, 42, 0, 0, 0), diagnose_metrics(1.0, 0.0, 42, 42, 0, 0, 1), diagnose_metrics(0.995, 0.5, 42, 43, 0, 0, 2), diagnose_metrics(1.0, 0.0, 42, 42, 0, 0, 3), ];
auto_diagnose(&metrics, 4096, 32, 8);
}
#[test]
fn test_verdict_symbol_all_variants() {
let _pass = Verdict::Pass.symbol();
let _warn_argmax = Verdict::WarnArgmax.symbol();
let _warn_oos = Verdict::WarnOutOfSpec.symbol();
let _fail_div = Verdict::FailDivergent.symbol();
let _fail_cat = Verdict::FailCatastrophic.symbol();
let _fail_nan = Verdict::FailNan.symbol();
}
#[test]
fn test_format_diff_ranges() {
let _tiny = format_diff(0.05);
let _medium = format_diff(0.5);
let _high = format_diff(2.5);
let _very_high = format_diff(10.0);
}
#[test]
fn test_format_cosine_ranges() {
let _perfect = format_cosine(0.99999);
let _good = format_cosine(0.9995);
let _warning = format_cosine(0.995);
let _bad = format_cosine(0.95);
let _catastrophic = format_cosine(0.5);
}
#[test]
fn test_print_header_no_panic() {
print_header();
}
#[test]
fn test_print_footer_no_panic() {
print_footer();
}
#[test]
fn test_print_row_pass() {
let m = SpcMetrics {
position: 0,
token_id: 42,
cpu_argmax: 100,
gpu_argmax: 100,
_cpu_top_logit: 5.0,
_gpu_top_logit: 5.0,
max_abs_diff: 0.001,
_max_diff_idx: 0,
mean_abs_diff: 0.0005,
rmse: 0.0003,
cosine_similarity: 0.99999,
kl_divergence: 0.0001,
sigma_level: 6.5,
cpu_nan: 0,
gpu_nan: 0,
out_of_spec_count: 0,
vocab_size: 32000,
};
print_row(&m);
}
#[test]
fn test_print_row_fail() {
let m = SpcMetrics {
position: 5,
token_id: 999,
cpu_argmax: 100,
gpu_argmax: 200,
_cpu_top_logit: 5.0,
_gpu_top_logit: -3.0,
max_abs_diff: 10.0,
_max_diff_idx: 42,
mean_abs_diff: 5.0,
rmse: 3.0,
cosine_similarity: 0.5,
kl_divergence: 5.0,
sigma_level: 0.5,
cpu_nan: 0,
gpu_nan: 0,
out_of_spec_count: 1000,
vocab_size: 32000,
};
print_row(&m);
}
#[test]
fn test_print_row_nan_verdict() {
let m = SpcMetrics {
position: 0,
token_id: 1,
cpu_argmax: 0,
gpu_argmax: 0,
_cpu_top_logit: 0.0,
_gpu_top_logit: 0.0,
max_abs_diff: 0.0,
_max_diff_idx: 0,
mean_abs_diff: 0.0,
rmse: 0.0,
cosine_similarity: 1.0,
kl_divergence: 0.0,
sigma_level: 99.0,
cpu_nan: 5,
gpu_nan: 0,
out_of_spec_count: 0,
vocab_size: 32000,
};
print_row(&m);
}
}