provable_contracts/
roofline.rs1use std::path::Path;
8
9#[derive(Debug, Clone)]
11pub struct RooflineCeiling {
12 pub model_bytes: f64,
14 pub bw_ceiling: f64,
16 pub compute_ceiling: f64,
18 pub throughput_ceiling: f64,
20 pub bottleneck: Bottleneck,
22 pub contract_id: String,
24}
25
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub enum Bottleneck {
29 Bandwidth,
30 Compute,
31}
32
33#[derive(Debug, Clone)]
35pub struct HardwareProfile {
36 pub bandwidth_gb_s: f64,
38 pub compute_gflops: f64,
40 pub ops_per_token: f64,
42}
43
44impl HardwareProfile {
45 pub fn apple_m_series() -> Self {
47 Self {
48 bandwidth_gb_s: 100.0, compute_gflops: 1000.0, ops_per_token: 2.0, }
52 }
53
54 pub fn nvidia_a100() -> Self {
56 Self {
57 bandwidth_gb_s: 2039.0, compute_gflops: 19500.0, ops_per_token: 2.0,
60 }
61 }
62}
63
64pub fn compute_roofline(
72 total_params: u64,
73 bits_per_weight: u32,
74 hw: &HardwareProfile,
75) -> RooflineCeiling {
76 #[allow(clippy::cast_precision_loss)]
78 let total_params_f = total_params as f64;
80 let model_bytes = total_params_f * f64::from(bits_per_weight) / 8.0;
81
82 let model_gb = model_bytes / 1e9;
84 let bw_ceiling = if model_gb > 0.0 {
85 hw.bandwidth_gb_s / model_gb
86 } else {
87 f64::INFINITY
88 };
89
90 let compute_ceiling = if hw.ops_per_token > 0.0 {
92 hw.compute_gflops * 1e9 / (total_params_f * hw.ops_per_token)
93 } else {
94 f64::INFINITY
95 };
96
97 let throughput_ceiling = bw_ceiling.min(compute_ceiling);
99 let bottleneck = if bw_ceiling < compute_ceiling {
100 Bottleneck::Bandwidth
101 } else {
102 Bottleneck::Compute
103 };
104
105 RooflineCeiling {
106 model_bytes,
107 bw_ceiling,
108 compute_ceiling,
109 throughput_ceiling,
110 bottleneck,
111 contract_id: "roofline-model-v1".to_string(),
112 }
113}
114
115pub fn load_roofline_contract(contracts_dir: &Path) -> Option<String> {
118 let path = contracts_dir.join("roofline-model-v1.yaml");
119 if path.exists() {
120 let content = std::fs::read_to_string(&path).ok()?;
121 for line in content.lines() {
123 if let Some(desc) = line.trim().strip_prefix("description:") {
124 return Some(desc.trim().trim_matches('"').to_string());
125 }
126 }
127 }
128 None
129}
130
131#[cfg(test)]
132mod tests {
133 use super::*;
134
135 #[test]
136 fn roofline_7b_q4() {
137 let hw = HardwareProfile {
139 bandwidth_gb_s: 200.0,
140 compute_gflops: 5000.0,
141 ops_per_token: 2.0,
142 };
143 let r = compute_roofline(7_000_000_000, 4, &hw);
144 assert!((r.model_bytes - 3_500_000_000.0).abs() < 1.0); assert!(r.bw_ceiling > 0.0);
146 assert!(r.compute_ceiling > 0.0);
147 assert!((r.throughput_ceiling - r.bw_ceiling.min(r.compute_ceiling)).abs() < f64::EPSILON);
148 assert_eq!(r.bottleneck, Bottleneck::Bandwidth); }
150
151 #[test]
152 fn roofline_contract_id() {
153 let hw = HardwareProfile::apple_m_series();
154 let r = compute_roofline(1_000_000, 16, &hw);
155 assert_eq!(r.contract_id, "roofline-model-v1");
156 }
157
158 #[test]
159 fn bottleneck_classification() {
160 let hw = HardwareProfile {
162 bandwidth_gb_s: 1000.0,
163 compute_gflops: 1.0, ops_per_token: 2.0,
165 };
166 let r = compute_roofline(1000, 32, &hw);
167 assert_eq!(r.bottleneck, Bottleneck::Compute);
168 }
169}