Skip to main content

trueno/brick/profiler/
checksum.rs

1//! Kernel checksum types for divergence detection.
2//!
3//! CORRECTNESS-011: Captures output checksum per kernel invocation.
4
5use std::fmt;
6
7/// Kernel checksum for divergence detection.
8///
9/// CORRECTNESS-011: Captures output checksum per kernel invocation.
10#[derive(Debug, Clone)]
11pub struct KernelChecksum {
12    /// Kernel/brick name
13    pub name: String,
14    /// Layer index
15    pub layer_idx: usize,
16    /// Sequence position
17    pub position: u32,
18    /// FNV-1a checksum of first 64 output floats
19    pub checksum: u64,
20}
21
22/// Information about a detected divergence between CPU and GPU.
23#[derive(Debug, Clone)]
24pub struct DivergenceInfo {
25    /// Name of the divergent kernel
26    pub kernel_name: String,
27    /// Layer where divergence occurred
28    pub layer_idx: usize,
29    /// Position where divergence occurred
30    pub position: u32,
31    /// Expected checksum (from CPU/reference)
32    pub expected_checksum: u64,
33    /// Actual checksum (from GPU/test)
34    pub actual_checksum: u64,
35}
36
37impl fmt::Display for DivergenceInfo {
38    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
39        write!(
40            f,
41            "DIVERGENCE at '{}' (layer {}, pos {}): expected 0x{:016X}, got 0x{:016X}",
42            self.kernel_name,
43            self.layer_idx,
44            self.position,
45            self.expected_checksum,
46            self.actual_checksum
47        )
48    }
49}
50
51/// FNV-1a hash of f32 slice (first 64 elements for efficiency).
52///
53/// Used for quick divergence detection between CPU and GPU outputs.
54#[inline]
55pub fn fnv1a_f32_checksum(data: &[f32]) -> u64 {
56    const FNV_OFFSET: u64 = 0xcbf29ce484222325;
57    const FNV_PRIME: u64 = 0x100000001b3;
58
59    let mut hash = FNV_OFFSET;
60    let len = data.len().min(64);
61    for &val in &data[..len] {
62        let bytes = val.to_le_bytes();
63        for byte in bytes {
64            hash ^= u64::from(byte);
65            hash = hash.wrapping_mul(FNV_PRIME);
66        }
67    }
68    hash
69}
70
71/// Macro for convenient brick timing with automatic sync.
72///
73/// # Usage
74///
75/// ```rust,ignore
76/// time_brick!(profiler, "RmsNorm", 1, {
77///     rmsnorm_kernel.launch();
78///     stream.synchronize(); // REQUIRED for GPU
79/// });
80/// ```
81#[macro_export]
82macro_rules! time_brick {
83    ($profiler:expr, $name:expr, $elements:expr, $body:block) => {{
84        let timer = $profiler.start($name);
85        let result = $body;
86        $profiler.stop(timer, $elements);
87        result
88    }};
89}