ferrum_testkit/op_diff/
mod.rs1pub mod gemm;
31pub mod marlin_matmul; pub mod paged_varlen_attn; pub mod qk_norm_rope;
34pub mod rms_norm;
35pub mod silu_mul;
36
37pub const NMSE_FP32_TOL: f64 = 1e-7;
39pub const NMSE_FP16_TOL: f64 = 1e-6;
41
42pub fn nmse(a: &[f32], b: &[f32]) -> f64 {
51 assert_eq!(a.len(), b.len(), "nmse: length mismatch");
52 if a.is_empty() {
53 return 0.0;
54 }
55 let n = a.len() as f64;
56 let mse_ab: f64 = a
57 .iter()
58 .zip(b.iter())
59 .map(|(x, y)| {
60 let d = (*x as f64) - (*y as f64);
61 d * d
62 })
63 .sum::<f64>()
64 / n;
65 let mse_a0: f64 = a
66 .iter()
67 .map(|x| {
68 let d = *x as f64;
69 d * d
70 })
71 .sum::<f64>()
72 / n;
73 if mse_a0 < 1e-30 {
74 return mse_ab;
75 }
76 mse_ab / mse_a0
77}
78
79pub type Output = Vec<f32>;
82
83pub trait OpUnderTest {
85 fn name(&self) -> &str;
87
88 fn run_cpu(&self, seed: u64) -> Output;
90
91 #[cfg(all(target_os = "macos", feature = "metal"))]
93 fn run_metal(&self, seed: u64) -> Output;
94
95 #[cfg(feature = "cuda")]
97 fn run_cuda(&self, seed: u64) -> Output;
98}
99
100#[derive(Debug)]
105pub struct NmseReport {
106 pub op: String,
107 pub seed: u64,
108 pub cpu: Vec<f32>,
109 pub metal_nmse: Option<f64>,
110 pub cuda_nmse: Option<f64>,
111}
112
113impl NmseReport {
114 pub fn within_tol(&self, tol: f64) -> bool {
116 self.metal_nmse.map_or(true, |n| n < tol) && self.cuda_nmse.map_or(true, |n| n < tol)
117 }
118}
119
120pub fn compare_backends(op: &dyn OpUnderTest, seed: u64) -> NmseReport {
123 let cpu = op.run_cpu(seed);
124 let metal_nmse = run_metal_nmse(op, &cpu, seed);
125 let cuda_nmse = run_cuda_nmse(op, &cpu, seed);
126 NmseReport {
127 op: op.name().to_string(),
128 seed,
129 cpu,
130 metal_nmse,
131 cuda_nmse,
132 }
133}
134
135#[cfg(all(target_os = "macos", feature = "metal"))]
136fn run_metal_nmse(op: &dyn OpUnderTest, cpu: &[f32], seed: u64) -> Option<f64> {
137 Some(nmse(cpu, &op.run_metal(seed)))
138}
139
140#[cfg(not(all(target_os = "macos", feature = "metal")))]
141fn run_metal_nmse(_op: &dyn OpUnderTest, _cpu: &[f32], _seed: u64) -> Option<f64> {
142 None
143}
144
145#[cfg(feature = "cuda")]
146fn run_cuda_nmse(op: &dyn OpUnderTest, cpu: &[f32], seed: u64) -> Option<f64> {
147 Some(nmse(cpu, &op.run_cuda(seed)))
148}
149
150#[cfg(not(feature = "cuda"))]
151fn run_cuda_nmse(_op: &dyn OpUnderTest, _cpu: &[f32], _seed: u64) -> Option<f64> {
152 None
153}
154
155pub fn random_vec(n: usize, lo: f32, hi: f32, seed: u64) -> Vec<f32> {
157 use rand::{Rng, SeedableRng};
158 let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
159 (0..n).map(|_| rng.random_range(lo..hi)).collect()
160}
161
162#[cfg(test)]
163mod tests {
164 use super::*;
165
166 #[test]
167 fn nmse_identical_is_zero() {
168 let a = vec![1.0, 2.0, 3.0];
169 assert!(nmse(&a, &a) < 1e-30);
170 }
171
172 #[test]
173 fn nmse_scaled_b_proportional() {
174 let a = vec![1.0, 2.0, 3.0, 4.0];
176 let b: Vec<f32> = a.iter().map(|x| x * 1.01).collect();
177 let n = nmse(&a, &b);
178 assert!((n - 1e-4).abs() < 1e-5);
180 }
181
182 #[test]
183 fn nmse_zero_reference_falls_back() {
184 let a = vec![0.0, 0.0, 0.0];
186 let b = vec![0.1, 0.1, 0.1];
187 let n = nmse(&a, &b);
188 assert!((n - 0.01).abs() < 1e-9);
189 }
190
191 #[test]
192 fn random_vec_determinism() {
193 let a = random_vec(100, -1.0, 1.0, 42);
194 let b = random_vec(100, -1.0, 1.0, 42);
195 assert_eq!(a, b);
196 }
197}