ferrum_bench_core/
env.rs

1//! Bench environment snapshot — hardware + software + config — and
2//! the SHA-256 `env_hash` used by `compare-commits.sh` and similar to
3//! filter "apples-to-apples" cells.
4//!
5//! See `docs/bench/PLAYBOOK.md` § 7 schema + § 0.6 vs-vLLM parity:
6//! every cell carries one `Env` block and its `env_hash`. Two cells
7//! with the same hash are guaranteed comparable.
8
9use ferrum_types::RuntimeConfigSnapshot;
10use serde::{Deserialize, Serialize};
11use std::collections::BTreeMap;
12
13/// Snapshot of everything we expect to affect bench outcomes.
14///
15/// `BTreeMap` (not `HashMap`) for `ferrum_env` so JSON serialization
16/// is deterministic — required for `env_hash` reproducibility.
17#[derive(Debug, Clone, Default, Serialize, Deserialize)]
18pub struct Env {
19    /// ferrum git commit short SHA.
20    pub commit_sha: String,
21    /// Stable hardware identifier (e.g. `rtx-4090`, `m1-max-32gb`).
22    pub hw_id: String,
23    /// NVIDIA driver version (e.g. `555.42.06`). Omitted on non-CUDA hosts.
24    #[serde(default, skip_serializing_if = "Option::is_none")]
25    pub driver: Option<String>,
26    /// CUDA toolkit version (e.g. `12.4`). Omitted on non-CUDA hosts.
27    #[serde(default, skip_serializing_if = "Option::is_none")]
28    pub cuda: Option<String>,
29    /// Rust toolchain version (e.g. `1.78.0`).
30    pub rust: String,
31    /// Cargo features enabled on the ferrum build (sorted).
32    pub ferrum_features: Vec<String>,
33
34    /// GPU clock lock value in MHz (`nvidia-smi -lgc <mhz>,<mhz>`).
35    #[serde(default, skip_serializing_if = "Option::is_none")]
36    pub gpu_clock_lock_mhz: Option<u32>,
37    /// Power limit in watts (`nvidia-smi -pl <W>`).
38    #[serde(default, skip_serializing_if = "Option::is_none")]
39    pub gpu_power_limit_w: Option<u32>,
40    /// Persistence mode state (`nvidia-smi -pm 1`).
41    #[serde(default, skip_serializing_if = "Option::is_none")]
42    pub gpu_persistence_mode: Option<bool>,
43    /// Auto-boost state (false ⇒ disabled).
44    #[serde(default, skip_serializing_if = "Option::is_none")]
45    pub gpu_auto_boost: Option<bool>,
46
47    /// Selected `FERRUM_*` env vars affecting runtime (sorted by BTreeMap).
48    pub ferrum_env: BTreeMap<String, String>,
49
50    /// Stable sorted runtime config snapshot for non-default `FERRUM_*` values.
51    #[serde(default)]
52    pub runtime_config: RuntimeConfigSnapshot,
53
54    /// `vllm serve` effective args, populated only for vLLM cells. None
55    /// for ferrum cells — used by the config-parity report block.
56    #[serde(default, skip_serializing_if = "Option::is_none")]
57    pub vllm_args: Option<Vec<String>>,
58}
59
60/// SHA-256 of canonical-JSON-serialized `Env`, prefixed with `sha256:`.
61#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
62pub struct EnvHash(pub String);
63
64impl EnvHash {
65    pub fn as_str(&self) -> &str {
66        &self.0
67    }
68}
69
70impl std::fmt::Display for EnvHash {
71    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72        f.write_str(&self.0)
73    }
74}
75
76impl Env {
77    /// Compute the env_hash. Deterministic given:
78    /// - `BTreeMap` (sorted) for `ferrum_env`
79    /// - explicit struct field order (serde serializes in declaration order)
80    /// - `Vec<String>` fields are caller-sorted (we don't sort here so the
81    ///   raw order is preserved for display — callers should sort `ferrum_features`
82    ///   before constructing `Env` if they want order-independent hashes).
83    pub fn hash(&self) -> EnvHash {
84        use sha2::{Digest, Sha256};
85        let canonical = serde_json::to_vec(self).expect("Env serialization must not fail");
86        let mut hasher = Sha256::new();
87        hasher.update(&canonical);
88        let digest = hasher.finalize();
89        EnvHash(format!("sha256:{:x}", digest))
90    }
91
92    /// Capture commit + rust + features + auto-detected hw/driver/cuda.
93    /// On CUDA hosts populates GPU driver + CUDA toolkit versions via
94    /// `nvidia-smi` and `nvcc --version`; on macOS uses `sysctl`.
95    pub fn capture_minimal(commit_sha: String, ferrum_features: Vec<String>) -> Self {
96        let mut feat = ferrum_features;
97        feat.sort();
98        feat.dedup();
99        Self {
100            commit_sha,
101            hw_id: detect_hw_id(),
102            driver: detect_nvidia_driver(),
103            cuda: detect_cuda_version(),
104            rust: detect_rust_version(),
105            ferrum_features: feat,
106            gpu_clock_lock_mhz: detect_gpu_clock_lock_mhz(),
107            gpu_power_limit_w: detect_gpu_power_limit_w(),
108            gpu_persistence_mode: detect_gpu_persistence(),
109            gpu_auto_boost: None,
110            ferrum_env: capture_ferrum_env(),
111            runtime_config: RuntimeConfigSnapshot::capture_current(),
112            vllm_args: None,
113        }
114    }
115}
116
117/// Heuristic hardware ID. On CUDA hosts uses the GPU name (e.g.
118/// "rtx-4090"); on macOS uses the CPU brand (e.g. "apple-m1-max").
119/// Returns generic "unknown" only when both fail.
120pub fn detect_hw_id() -> String {
121    // Try nvidia-smi first — most reliable on CUDA hosts.
122    if let Some(name) = nvidia_smi_query("name") {
123        // "NVIDIA GeForce RTX 4090" → "rtx-4090"
124        let normalized = name
125            .to_lowercase()
126            .replace("nvidia ", "")
127            .replace("geforce ", "")
128            .trim()
129            .replace(' ', "-");
130        if !normalized.is_empty() {
131            return normalized;
132        }
133    }
134    #[cfg(target_os = "macos")]
135    {
136        if let Some(brand) = std::process::Command::new("sysctl")
137            .args(["-n", "machdep.cpu.brand_string"])
138            .output()
139            .ok()
140            .and_then(|o| String::from_utf8(o.stdout).ok())
141        {
142            return brand.trim().to_lowercase().replace(' ', "-");
143        }
144    }
145    // Linux fallback: read /proc/cpuinfo "model name" first line.
146    if let Ok(content) = std::fs::read_to_string("/proc/cpuinfo") {
147        for line in content.lines() {
148            if let Some(rest) = line.strip_prefix("model name") {
149                if let Some(name) = rest.split(':').nth(1) {
150                    return name.trim().to_lowercase().replace(' ', "-");
151                }
152            }
153        }
154    }
155    "unknown".to_string()
156}
157
158/// Best-effort NVIDIA driver version via `nvidia-smi --query-gpu=driver_version`.
159/// Returns None on hosts without nvidia-smi.
160pub fn detect_nvidia_driver() -> Option<String> {
161    nvidia_smi_query("driver_version")
162}
163
164/// Best-effort CUDA toolkit version via `nvcc --version` or
165/// `nvidia-smi --query-gpu=cuda_version`. Toolkit version (nvcc) is
166/// reported when available, else driver-reported runtime version.
167pub fn detect_cuda_version() -> Option<String> {
168    // Try nvcc first — that's the toolkit (what ferrum compiles against).
169    if let Ok(out) = std::process::Command::new("nvcc").arg("--version").output() {
170        if let Ok(s) = String::from_utf8(out.stdout) {
171            for line in s.lines() {
172                if let Some(idx) = line.find("release ") {
173                    let rest = &line[idx + 8..];
174                    if let Some(comma) = rest.find(',') {
175                        return Some(rest[..comma].trim().to_string());
176                    }
177                }
178            }
179        }
180    }
181    // Fall back to driver-reported runtime CUDA via nvidia-smi.
182    nvidia_smi_query("cuda_version")
183}
184
185/// Query nvidia-smi for a single field. Returns None if nvidia-smi
186/// isn't available or the field isn't supported.
187fn nvidia_smi_query(field: &str) -> Option<String> {
188    let out = std::process::Command::new("nvidia-smi")
189        .args([
190            &format!("--query-gpu={field}"),
191            "--format=csv,noheader,nounits",
192        ])
193        .output()
194        .ok()?;
195    if !out.status.success() {
196        return None;
197    }
198    let s = String::from_utf8(out.stdout).ok()?;
199    let first = s.lines().next()?.trim().to_string();
200    if first.is_empty() || first == "[Not Supported]" || first == "[N/A]" {
201        return None;
202    }
203    Some(first)
204}
205
206/// GPU clock lock state (MHz). Returns the *current* graphics clock —
207/// when `nvidia-smi -lgc N,N` is applied this equals the lock value;
208/// without lock it equals whatever the GPU is currently doing.
209pub fn detect_gpu_clock_lock_mhz() -> Option<u32> {
210    nvidia_smi_query("clocks.gr").and_then(|s| s.parse::<u32>().ok())
211}
212
213/// GPU power limit in watts.
214pub fn detect_gpu_power_limit_w() -> Option<u32> {
215    nvidia_smi_query("power.limit").and_then(|s| s.split('.').next()?.parse::<u32>().ok())
216}
217
218/// Persistence mode (true ⇒ enabled).
219pub fn detect_gpu_persistence() -> Option<bool> {
220    nvidia_smi_query("persistence_mode").map(|s| s == "Enabled")
221}
222
223/// Best-effort Rust toolchain version. Tries `rustc --version` via the
224/// `RUSTC` env (set by cargo) first, then plain `rustc`. Falls back to
225/// the compile-time string the binary was compiled with via env var
226/// inserted by build.rs (not yet wired — returns `unknown` until then).
227pub fn detect_rust_version() -> String {
228    let rustc = std::env::var("RUSTC").unwrap_or_else(|_| "rustc".to_string());
229    std::process::Command::new(rustc)
230        .arg("--version")
231        .output()
232        .ok()
233        .and_then(|o| String::from_utf8(o.stdout).ok())
234        .and_then(|s| {
235            // "rustc 1.78.0 (9b00956e5 2024-04-29)" → "1.78.0"
236            s.split_whitespace().nth(1).map(|v| v.to_string())
237        })
238        .unwrap_or_else(|| "unknown".to_string())
239}
240
241/// Snapshot all `FERRUM_*` env vars in the current process, sorted.
242pub fn capture_ferrum_env() -> BTreeMap<String, String> {
243    std::env::vars()
244        .filter(|(k, _)| k.starts_with("FERRUM_"))
245        .collect()
246}
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251
252    fn fixture_env() -> Env {
253        let mut ferrum_env = BTreeMap::new();
254        ferrum_env.insert("FERRUM_KV_MAX_BLOCKS".into(), "2048".into());
255        ferrum_env.insert("FERRUM_PREFIX_CACHE".into(), "0".into());
256        Env {
257            commit_sha: "b769bbd".into(),
258            hw_id: "rtx-4090".into(),
259            driver: Some("555.42.06".into()),
260            cuda: Some("12.4".into()),
261            rust: "1.78.0".into(),
262            ferrum_features: vec!["cuda".into(), "vllm-moe-marlin".into()],
263            gpu_clock_lock_mhz: Some(2520),
264            gpu_power_limit_w: Some(350),
265            gpu_persistence_mode: Some(true),
266            gpu_auto_boost: Some(false),
267            ferrum_env,
268            runtime_config: RuntimeConfigSnapshot::default(),
269            vllm_args: None,
270        }
271    }
272
273    #[test]
274    fn env_hash_is_deterministic() {
275        let h1 = fixture_env().hash();
276        let h2 = fixture_env().hash();
277        assert_eq!(h1, h2);
278        assert!(h1.0.starts_with("sha256:"));
279        assert_eq!(h1.0.len(), "sha256:".len() + 64);
280    }
281
282    #[test]
283    fn env_hash_changes_on_clock_lock() {
284        let h1 = fixture_env().hash();
285        let mut e = fixture_env();
286        e.gpu_clock_lock_mhz = Some(2400); // different lock value
287        let h2 = e.hash();
288        assert_ne!(h1, h2);
289    }
290
291    #[test]
292    fn env_hash_changes_on_ferrum_env() {
293        let h1 = fixture_env().hash();
294        let mut e = fixture_env();
295        e.ferrum_env.insert("FERRUM_VLLM_MOE".into(), "1".into());
296        let h2 = e.hash();
297        assert_ne!(h1, h2);
298    }
299
300    #[test]
301    fn ferrum_env_order_independent() {
302        // BTreeMap sorts by key, so insertion order should not matter.
303        let mut e1 = fixture_env();
304        e1.ferrum_env.clear();
305        e1.ferrum_env.insert("A".into(), "1".into());
306        e1.ferrum_env.insert("B".into(), "2".into());
307
308        let mut e2 = fixture_env();
309        e2.ferrum_env.clear();
310        e2.ferrum_env.insert("B".into(), "2".into());
311        e2.ferrum_env.insert("A".into(), "1".into());
312
313        assert_eq!(e1.hash(), e2.hash());
314    }
315}
ferrum_bench_core/env.rs

ferrum_bench_core/
env.rs