use std::collections::HashMap;
use std::fmt;
#[derive(Debug, Clone)]
pub struct TraceEvent {
pub name: String,
pub position: usize,
pub l2_norm: f32,
pub mean: f32,
pub min: f32,
pub max: f32,
pub head: [f32; 8],
pub full_data: Option<Vec<f32>>,
pub len: usize,
}
impl TraceEvent {
pub fn new(name: &str, tensor: &[f32], position: usize, verbose: bool) -> Self {
let len = tensor.len();
let l2_norm = tensor.iter().map(|x| x * x).sum::<f32>().sqrt();
let mean = if len > 0 {
tensor.iter().sum::<f32>() / len as f32
} else {
0.0
};
let min = tensor.iter().cloned().fold(f32::INFINITY, f32::min);
let max = tensor.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
let mut head = [0.0f32; 8];
for (i, &v) in tensor.iter().take(8).enumerate() {
head[i] = v;
}
let full_data = if verbose { Some(tensor.to_vec()) } else { None };
Self {
name: name.to_string(),
position,
l2_norm,
mean,
min,
max,
head,
full_data,
len,
}
}
pub fn approx_eq(&self, other: &Self, tolerance: f32) -> bool {
let l2_diff = (self.l2_norm - other.l2_norm).abs();
let l2_rel = if self.l2_norm.abs() > 1e-10 {
l2_diff / self.l2_norm.abs()
} else {
l2_diff
};
l2_rel <= tolerance
}
pub fn relative_diff(&self, other: &Self) -> f32 {
let l2_diff = (self.l2_norm - other.l2_norm).abs();
if self.l2_norm.abs() > 1e-10 {
l2_diff / self.l2_norm.abs()
} else {
l2_diff
}
}
pub fn cosine_similarity(&self, other: &Self) -> Option<f32> {
let a = self.full_data.as_ref()?;
let b = other.full_data.as_ref()?;
if a.len() != b.len() || a.is_empty() {
return None;
}
let mut dot = 0.0f64;
let mut norm_a = 0.0f64;
let mut norm_b = 0.0f64;
for (&ai, &bi) in a.iter().zip(b.iter()) {
let ai = f64::from(ai);
let bi = f64::from(bi);
dot += ai * bi;
norm_a += ai * ai;
norm_b += bi * bi;
}
let denom = norm_a.sqrt() * norm_b.sqrt();
if denom < 1e-30 {
return None;
}
Some((dot / denom) as f32)
}
}
impl fmt::Display for TraceEvent {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{} [pos={}]: L2={:.6}, mean={:.6}, range=[{:.6}, {:.6}], len={}, head=[{:.4}, {:.4}, {:.4}, {:.4}...]",
self.name,
self.position,
self.l2_norm,
self.mean,
self.min,
self.max,
self.len,
self.head[0],
self.head[1],
self.head[2],
self.head[3],
)
}
}
#[derive(Debug, Clone)]
pub struct TraceDiff {
pub name: String,
pub position: usize,
pub cpu_l2: f32,
pub gpu_l2: f32,
pub relative_diff: f32,
pub cpu_head: [f32; 8],
pub gpu_head: [f32; 8],
}
impl fmt::Display for TraceDiff {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{} [pos={}]: CPU L2={:.6} vs GPU L2={:.6} (diff={:.2}%)\n CPU head: [{:.4}, {:.4}, {:.4}, {:.4}...]\n GPU head: [{:.4}, {:.4}, {:.4}, {:.4}...]",
self.name,
self.position,
self.cpu_l2,
self.gpu_l2,
self.relative_diff * 100.0,
self.cpu_head[0], self.cpu_head[1], self.cpu_head[2], self.cpu_head[3],
self.gpu_head[0], self.gpu_head[1], self.gpu_head[2], self.gpu_head[3],
)
}
}
#[derive(Debug, Clone)]
pub struct TraceComparison {
pub diffs: Vec<TraceDiff>,
pub tolerance: f32,
}
impl TraceComparison {
pub fn first_divergence(&self) -> Option<&TraceDiff> {
self.diffs.first()
}
pub fn is_equivalent(&self) -> bool {
self.diffs.is_empty()
}
pub fn summary(&self) -> String {
if self.diffs.is_empty() {
"No divergence detected".to_string()
} else {
let first = &self.diffs[0];
format!(
"First divergence at '{}' (pos={}): {:.2}% L2 diff ({} total divergences)",
first.name,
first.position,
first.relative_diff * 100.0,
self.diffs.len()
)
}
}
}
use crate::brick::profiler::ContractSeverity;
impl TraceComparison {
pub fn validate_parity_contracts(
cpu: &BrickTracer,
gpu: &BrickTracer,
) -> Vec<(ContractSeverity, String)> {
let mut violations = Vec::new();
const COSINE_THRESHOLD: f32 = 0.99;
for cpu_event in cpu.events() {
if let Some(gpu_event) = gpu.get(&cpu_event.name) {
if let Some(sim) = cpu_event.cosine_similarity(gpu_event) {
if sim < COSINE_THRESHOLD {
violations.push((
ContractSeverity::Error,
format!(
"layer-parity-v1 COSINE_PARITY: '{}' cosine_sim={:.4} < {:.2} threshold. \
CPU L2={:.6}, GPU L2={:.6}. Graph replay or quantization divergence.",
cpu_event.name, sim, COSINE_THRESHOLD,
cpu_event.l2_norm, gpu_event.l2_norm
),
));
}
}
}
}
violations
}
}
impl fmt::Display for TraceComparison {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "=== TRACE COMPARISON ===")?;
writeln!(f, "Tolerance: {:.4}%", self.tolerance * 100.0)?;
writeln!(f, "Divergences: {}", self.diffs.len())?;
if self.diffs.is_empty() {
writeln!(f, "Result: MATCH")?;
} else {
writeln!(f, "Result: DIVERGENCE DETECTED")?;
writeln!(f, "\n--- First Divergence ---")?;
if let Some(first) = self.diffs.first() {
writeln!(f, "{first}")?;
}
if self.diffs.len() > 1 {
writeln!(f, "\n--- All Divergences ---")?;
for diff in &self.diffs {
writeln!(
f,
" {}: {:.2}% diff",
diff.name,
diff.relative_diff * 100.0
)?;
}
}
}
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct BrickTracer {
events: Vec<TraceEvent>,
position: usize,
verbose: bool,
index: HashMap<String, usize>,
}
impl Default for BrickTracer {
fn default() -> Self {
Self::new()
}
}
include!("tracer_impl.rs");
include!("tracer_trace_event.rs");