Skip to main content

provable_contracts/
roofline.rs

1//! Runtime roofline model derived from contract YAML.
2//!
3//! Reads `roofline-model-v1.yaml` and provides performance ceiling
4//! calculations. Consumer crates use this instead of hardcoded formulas.
5//! Section 24: Deep Stack Integration.
6
7use std::path::Path;
8
9/// Performance ceilings derived from the roofline contract.
10#[derive(Debug, Clone)]
11pub struct RooflineCeiling {
12    /// Model size in bytes
13    pub model_bytes: f64,
14    /// Bandwidth-limited ceiling (tokens/sec)
15    pub bw_ceiling: f64,
16    /// Compute-limited ceiling (tokens/sec)
17    pub compute_ceiling: f64,
18    /// Effective ceiling: min(bw, compute)
19    pub throughput_ceiling: f64,
20    /// Whether bandwidth-bound or compute-bound
21    pub bottleneck: Bottleneck,
22    /// Source contract ID
23    pub contract_id: String,
24}
25
26/// Whether the workload is memory-bandwidth or compute bound.
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub enum Bottleneck {
29    Bandwidth,
30    Compute,
31}
32
33/// Hardware profile for roofline calculation.
34#[derive(Debug, Clone)]
35pub struct HardwareProfile {
36    /// Effective memory bandwidth in GB/s
37    pub bandwidth_gb_s: f64,
38    /// Effective compute throughput in GFLOPS
39    pub compute_gflops: f64,
40    /// Operations per token (depends on model architecture)
41    pub ops_per_token: f64,
42}
43
44impl HardwareProfile {
45    /// Create a profile for Apple M-series (conservative estimates).
46    pub fn apple_m_series() -> Self {
47        Self {
48            bandwidth_gb_s: 100.0,  // M1 Pro ~200, M2 Ultra ~800, use conservative
49            compute_gflops: 1000.0, // GPU TFLOPS varies; conservative
50            ops_per_token: 2.0,     // 2 FLOPs per parameter per token (forward pass)
51        }
52    }
53
54    /// Create a profile for NVIDIA A100.
55    pub fn nvidia_a100() -> Self {
56        Self {
57            bandwidth_gb_s: 2039.0,  // HBM2e bandwidth
58            compute_gflops: 19500.0, // FP16 tensor core
59            ops_per_token: 2.0,
60        }
61    }
62}
63
64/// Compute roofline ceilings from contract equations + hardware profile.
65///
66/// Implements the 4 equations from `roofline-model-v1.yaml`:
67/// 1. `model_bytes = total_params × bits_per_weight / 8`
68/// 2. `bw_ceiling = effective_bandwidth_GB_s / (model_bytes / 1e9)`
69/// 3. `compute_ceiling = effective_GFLOPS / ops_per_token`
70/// 4. `throughput <= min(bw_ceiling, compute_ceiling)`
71pub fn compute_roofline(
72    total_params: u64,
73    bits_per_weight: u32,
74    hw: &HardwareProfile,
75) -> RooflineCeiling {
76    // Equation 1: model_bytes
77    #[allow(clippy::cast_precision_loss)]
78    // model params fit within f64 mantissa for all real models
79    let total_params_f = total_params as f64;
80    let model_bytes = total_params_f * f64::from(bits_per_weight) / 8.0;
81
82    // Equation 2: bw_ceiling (tokens/sec)
83    let model_gb = model_bytes / 1e9;
84    let bw_ceiling = if model_gb > 0.0 {
85        hw.bandwidth_gb_s / model_gb
86    } else {
87        f64::INFINITY
88    };
89
90    // Equation 3: compute_ceiling (tokens/sec)
91    let compute_ceiling = if hw.ops_per_token > 0.0 {
92        hw.compute_gflops * 1e9 / (total_params_f * hw.ops_per_token)
93    } else {
94        f64::INFINITY
95    };
96
97    // Equation 4: throughput <= min(bw_ceiling, compute_ceiling)
98    let throughput_ceiling = bw_ceiling.min(compute_ceiling);
99    let bottleneck = if bw_ceiling < compute_ceiling {
100        Bottleneck::Bandwidth
101    } else {
102        Bottleneck::Compute
103    };
104
105    RooflineCeiling {
106        model_bytes,
107        bw_ceiling,
108        compute_ceiling,
109        throughput_ceiling,
110        bottleneck,
111        contract_id: "roofline-model-v1".to_string(),
112    }
113}
114
115/// Try to load roofline contract from a standard path.
116/// Returns the contract description if found, None if not.
117pub fn load_roofline_contract(contracts_dir: &Path) -> Option<String> {
118    let path = contracts_dir.join("roofline-model-v1.yaml");
119    if path.exists() {
120        let content = std::fs::read_to_string(&path).ok()?;
121        // Extract description
122        for line in content.lines() {
123            if let Some(desc) = line.trim().strip_prefix("description:") {
124                return Some(desc.trim().trim_matches('"').to_string());
125            }
126        }
127    }
128    None
129}
130
131#[cfg(test)]
132mod tests {
133    use super::*;
134
135    #[test]
136    fn roofline_7b_q4() {
137        // 7B params, 4-bit quantized, on hypothetical hardware
138        let hw = HardwareProfile {
139            bandwidth_gb_s: 200.0,
140            compute_gflops: 5000.0,
141            ops_per_token: 2.0,
142        };
143        let r = compute_roofline(7_000_000_000, 4, &hw);
144        assert!((r.model_bytes - 3_500_000_000.0).abs() < 1.0); // 7B * 4 / 8 = 3.5GB
145        assert!(r.bw_ceiling > 0.0);
146        assert!(r.compute_ceiling > 0.0);
147        assert!((r.throughput_ceiling - r.bw_ceiling.min(r.compute_ceiling)).abs() < f64::EPSILON);
148        assert_eq!(r.bottleneck, Bottleneck::Bandwidth); // Memory-bound at 4-bit
149    }
150
151    #[test]
152    fn roofline_contract_id() {
153        let hw = HardwareProfile::apple_m_series();
154        let r = compute_roofline(1_000_000, 16, &hw);
155        assert_eq!(r.contract_id, "roofline-model-v1");
156    }
157
158    #[test]
159    fn bottleneck_classification() {
160        // Tiny model on fast hardware = compute bound
161        let hw = HardwareProfile {
162            bandwidth_gb_s: 1000.0,
163            compute_gflops: 1.0, // Very slow compute
164            ops_per_token: 2.0,
165        };
166        let r = compute_roofline(1000, 32, &hw);
167        assert_eq!(r.bottleneck, Bottleneck::Compute);
168    }
169}