Skip to main content

mdx_rust_core/
evidence.rs

1//! Measured behavioral evidence for autonomous Rust evolution.
2//!
3//! v1.0 beta makes evidence more granular than a repo-level hint. The refactor
4//! planner and autopilot can consume file/function evidence profiles to decide
5//! how much autonomy is allowed on a target.
6
7use crate::eval::stable_hash_hex;
8use crate::refactor::{EvidenceAnalysisDepth, EvidenceGrade};
9use schemars::JsonSchema;
10use serde::{Deserialize, Serialize};
11use std::path::{Component, Path, PathBuf};
12use std::process::{Command, Stdio};
13use std::time::{Duration, Instant};
14
15#[derive(Debug, Clone)]
16pub struct EvidenceRunConfig {
17    pub target: Option<PathBuf>,
18    pub include_coverage: bool,
19    pub include_mutation: bool,
20    pub include_semver: bool,
21    pub command_timeout: Duration,
22}
23
24impl Default for EvidenceRunConfig {
25    fn default() -> Self {
26        Self {
27            target: None,
28            include_coverage: false,
29            include_mutation: false,
30            include_semver: false,
31            command_timeout: Duration::from_secs(180),
32        }
33    }
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
37pub struct EvidenceRun {
38    pub schema_version: String,
39    pub run_id: String,
40    pub root: String,
41    pub target: Option<String>,
42    pub grade: EvidenceGrade,
43    pub analysis_depth: EvidenceAnalysisDepth,
44    pub metrics: Vec<EvidenceMetric>,
45    #[serde(default)]
46    pub file_profiles: Vec<EvidenceFileProfile>,
47    pub commands: Vec<EvidenceCommandRecord>,
48    pub unlocked_recipe_tiers: Vec<String>,
49    pub unlock_suggestions: Vec<String>,
50    pub note: String,
51    pub artifact_path: Option<String>,
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
55pub struct EvidenceFileProfile {
56    pub file: String,
57    pub grade: EvidenceGrade,
58    pub analysis_depth: EvidenceAnalysisDepth,
59    pub signals: Vec<String>,
60    #[serde(default)]
61    pub coverage_percent: Option<f64>,
62    #[serde(default)]
63    pub mutation_score_percent: Option<f64>,
64    pub function_profiles: Vec<EvidenceFunctionProfile>,
65}
66
67#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
68pub struct EvidenceFunctionProfile {
69    pub name: String,
70    pub line: usize,
71    pub grade: EvidenceGrade,
72    pub signals: Vec<String>,
73    #[serde(default)]
74    pub coverage_percent: Option<f64>,
75}
76
77#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
78pub struct EvidenceMetric {
79    pub id: String,
80    pub label: String,
81    pub value: f64,
82    pub unit: String,
83    pub source_command: String,
84}
85
86#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
87pub struct EvidenceCommandRecord {
88    pub id: String,
89    pub command: String,
90    pub skipped: bool,
91    pub skip_reason: Option<String>,
92    pub success: bool,
93    pub timed_out: bool,
94    pub status_code: Option<i32>,
95    pub duration_ms: u128,
96    pub stdout: String,
97    pub stderr: String,
98}
99
100#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
101pub struct EvidenceArtifactRef {
102    pub run_id: String,
103    pub grade: EvidenceGrade,
104    pub analysis_depth: EvidenceAnalysisDepth,
105    #[serde(default)]
106    pub profiled_files: usize,
107    pub artifact_path: Option<String>,
108}
109
110impl From<&EvidenceRun> for EvidenceArtifactRef {
111    fn from(run: &EvidenceRun) -> Self {
112        Self {
113            run_id: run.run_id.clone(),
114            grade: run.grade,
115            analysis_depth: run.analysis_depth.clone(),
116            profiled_files: run.file_profiles.len(),
117            artifact_path: run.artifact_path.clone(),
118        }
119    }
120}
121
122pub fn run_evidence(
123    root: &Path,
124    artifact_root: Option<&Path>,
125    config: &EvidenceRunConfig,
126) -> anyhow::Result<EvidenceRun> {
127    let root = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
128    let target = config
129        .target
130        .as_ref()
131        .map(|path| resolve_target(&root, path))
132        .transpose()?;
133
134    let mut commands = Vec::new();
135    commands.push(run_command(
136        &root,
137        "cargo-metadata",
138        "cargo metadata --no-deps --format-version 1",
139        config.command_timeout,
140    ));
141    commands.push(run_command(
142        &root,
143        "cargo-test",
144        "cargo test",
145        config.command_timeout,
146    ));
147
148    if config.include_coverage {
149        commands.push(run_optional_cargo_subcommand(
150            &root,
151            "cargo-llvm-cov",
152            "coverage",
153            "cargo llvm-cov --workspace --summary-only",
154            config.command_timeout,
155        ));
156    } else {
157        commands.push(skipped_command(
158            "coverage",
159            "cargo llvm-cov --workspace --summary-only",
160            "coverage evidence was not requested",
161        ));
162    }
163
164    if config.include_mutation {
165        commands.push(run_optional_cargo_subcommand(
166            &root,
167            "cargo-mutants",
168            "mutation",
169            "cargo mutants --no-shuffle --timeout 60",
170            config.command_timeout,
171        ));
172    } else {
173        commands.push(skipped_command(
174            "mutation",
175            "cargo mutants --no-shuffle --timeout 60",
176            "mutation evidence was not requested",
177        ));
178    }
179
180    if config.include_semver {
181        commands.push(run_optional_cargo_subcommand(
182            &root,
183            "cargo-semver-checks",
184            "semver-checks",
185            "cargo semver-checks",
186            config.command_timeout,
187        ));
188    } else {
189        commands.push(skipped_command(
190            "semver-checks",
191            "cargo semver-checks",
192            "semver evidence was not requested",
193        ));
194    }
195
196    let grade = grade_from_commands(&commands);
197    let analysis_depth = analysis_depth_for_grade(grade);
198    let metrics = evidence_metrics(&commands);
199    let coverage_percent = metric_value(&metrics, "coverage-percent");
200    let mutation_score_percent = metric_value(&metrics, "mutation-score-percent");
201    let file_profiles = evidence_file_profiles(
202        &root,
203        target.as_deref(),
204        grade,
205        coverage_percent,
206        mutation_score_percent,
207    )?;
208    let mut run = EvidenceRun {
209        schema_version: "1.0".to_string(),
210        run_id: evidence_run_id(&root, target.as_deref(), &commands),
211        root: root.display().to_string(),
212        target: target.as_ref().map(|path| path.display().to_string()),
213        grade,
214        analysis_depth,
215        metrics,
216        file_profiles,
217        commands,
218        unlocked_recipe_tiers: unlocked_recipe_tiers(grade),
219        unlock_suggestions: unlock_suggestions(grade, config),
220        note: evidence_note(grade),
221        artifact_path: None,
222    };
223
224    if let Some(artifact_root) = artifact_root {
225        let path = persist_evidence_run(artifact_root, &run)?;
226        run.artifact_path = Some(path.display().to_string());
227        std::fs::write(&path, serde_json::to_string_pretty(&run)?)?;
228    }
229
230    Ok(run)
231}
232
233pub fn load_latest_evidence(artifact_root: Option<&Path>) -> anyhow::Result<Option<EvidenceRun>> {
234    load_latest_evidence_matching(artifact_root, |_| true)
235}
236
237pub fn load_latest_evidence_for_root(
238    artifact_root: Option<&Path>,
239    root: &Path,
240) -> anyhow::Result<Option<EvidenceRun>> {
241    let root = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
242    load_latest_evidence_matching(artifact_root, |run| run.root == root.display().to_string())
243}
244
245fn load_latest_evidence_matching(
246    artifact_root: Option<&Path>,
247    matches_run: impl Fn(&EvidenceRun) -> bool,
248) -> anyhow::Result<Option<EvidenceRun>> {
249    let Some(artifact_root) = artifact_root else {
250        return Ok(None);
251    };
252    let dir = artifact_root.join("evidence");
253    if !dir.exists() {
254        return Ok(None);
255    }
256
257    let mut entries = Vec::new();
258    for entry in std::fs::read_dir(&dir)? {
259        let entry = entry?;
260        let path = entry.path();
261        if path.extension().is_some_and(|ext| ext == "json") {
262            let modified = entry
263                .metadata()
264                .and_then(|metadata| metadata.modified())
265                .ok();
266            entries.push((modified, path));
267        }
268    }
269    entries.sort_by(|left, right| left.0.cmp(&right.0).then_with(|| left.1.cmp(&right.1)));
270
271    while let Some((_, path)) = entries.pop() {
272        let Ok(content) = std::fs::read_to_string(path) else {
273            continue;
274        };
275        let Ok(run) = serde_json::from_str::<EvidenceRun>(&content) else {
276            continue;
277        };
278        if matches_run(&run) {
279            return Ok(Some(run));
280        }
281    }
282    Ok(None)
283}
284
285fn resolve_target(root: &Path, target: &Path) -> anyhow::Result<PathBuf> {
286    if target
287        .components()
288        .any(|component| matches!(component, Component::ParentDir))
289    {
290        anyhow::bail!(
291            "evidence target must stay inside root: {}",
292            target.display()
293        );
294    }
295    let resolved = if target.is_absolute() {
296        target.to_path_buf()
297    } else {
298        root.join(target)
299    };
300    if !resolved.starts_with(root) {
301        anyhow::bail!("evidence target is outside root: {}", target.display());
302    }
303    Ok(resolved
304        .strip_prefix(root)
305        .unwrap_or(&resolved)
306        .to_path_buf())
307}
308
309fn evidence_file_profiles(
310    root: &Path,
311    target: Option<&Path>,
312    run_grade: EvidenceGrade,
313    coverage_percent: Option<f64>,
314    mutation_score_percent: Option<f64>,
315) -> anyhow::Result<Vec<EvidenceFileProfile>> {
316    let scan_root = target.map_or_else(|| root.to_path_buf(), |target| root.join(target));
317    let mut files = Vec::new();
318    collect_rust_files(&scan_root, &mut files)?;
319    files.sort();
320
321    let mut profiles = Vec::new();
322    for file in files.into_iter().take(250) {
323        let Ok(content) = std::fs::read_to_string(&file) else {
324            continue;
325        };
326        let relative = file
327            .strip_prefix(root)
328            .unwrap_or(&file)
329            .display()
330            .to_string();
331        let has_test_markers = content.contains("#[test]")
332            || content.contains("#[tokio::test]")
333            || content.contains("mod tests")
334            || content.contains("#[cfg(test)]");
335        let mut signals = vec!["cargo metadata collected for workspace".to_string()];
336        if command_grade_implies_tests(run_grade) {
337            signals.push("workspace cargo test passed during evidence run".to_string());
338        }
339        if has_test_markers {
340            signals.push("file contains Rust test markers".to_string());
341        }
342        if let Some(percent) = coverage_percent {
343            signals.push(format!("workspace coverage measured at {percent:.1}%"));
344        }
345        if let Some(percent) = mutation_score_percent {
346            signals.push(format!(
347                "workspace mutation score measured at {percent:.1}%"
348            ));
349        }
350        let file_grade = if run_grade >= EvidenceGrade::Covered {
351            run_grade
352        } else if has_test_markers && run_grade >= EvidenceGrade::Tested {
353            EvidenceGrade::Tested
354        } else {
355            run_grade.min(EvidenceGrade::Compiled)
356        };
357        profiles.push(EvidenceFileProfile {
358            file: relative,
359            grade: file_grade,
360            analysis_depth: analysis_depth_for_grade(file_grade),
361            signals: signals.clone(),
362            coverage_percent,
363            mutation_score_percent,
364            function_profiles: function_profiles(&content, file_grade, &signals, coverage_percent),
365        });
366    }
367
368    Ok(profiles)
369}
370
371fn collect_rust_files(path: &Path, files: &mut Vec<PathBuf>) -> anyhow::Result<()> {
372    if path.is_file() {
373        if path.extension().is_some_and(|extension| extension == "rs") {
374            files.push(path.to_path_buf());
375        }
376        return Ok(());
377    }
378    if !path.is_dir() {
379        return Ok(());
380    }
381    for entry in std::fs::read_dir(path)? {
382        let entry = entry?;
383        let path = entry.path();
384        if path.is_dir() {
385            let name = path
386                .file_name()
387                .and_then(|name| name.to_str())
388                .unwrap_or("");
389            if matches!(name, ".git" | ".mdx-rust" | "target") {
390                continue;
391            }
392            collect_rust_files(&path, files)?;
393        } else if path.extension().is_some_and(|extension| extension == "rs") {
394            files.push(path);
395        }
396    }
397    Ok(())
398}
399
400fn function_profiles(
401    content: &str,
402    file_grade: EvidenceGrade,
403    file_signals: &[String],
404    coverage_percent: Option<f64>,
405) -> Vec<EvidenceFunctionProfile> {
406    content
407        .lines()
408        .enumerate()
409        .filter_map(|(index, line)| {
410            let trimmed = line.trim_start();
411            let name = trimmed
412                .strip_prefix("pub fn ")
413                .or_else(|| trimmed.strip_prefix("pub(crate) fn "))
414                .or_else(|| trimmed.strip_prefix("fn "))?;
415            let name = name
416                .split(['(', '<', ' '])
417                .next()
418                .filter(|name| !name.is_empty())?;
419            Some(EvidenceFunctionProfile {
420                name: name.to_string(),
421                line: index + 1,
422                grade: file_grade,
423                signals: file_signals.to_vec(),
424                coverage_percent,
425            })
426        })
427        .collect()
428}
429
430fn command_grade_implies_tests(grade: EvidenceGrade) -> bool {
431    grade >= EvidenceGrade::Tested
432}
433
434fn run_optional_cargo_subcommand(
435    root: &Path,
436    executable: &str,
437    id: &str,
438    command: &str,
439    timeout: Duration,
440) -> EvidenceCommandRecord {
441    if !executable_exists(executable) {
442        return skipped_command(id, command, &format!("{executable} was not found on PATH"));
443    }
444    run_command(root, id, command, timeout)
445}
446
447fn run_command(root: &Path, id: &str, command: &str, timeout: Duration) -> EvidenceCommandRecord {
448    let started_at = Instant::now();
449    let mut parts = command.split_whitespace();
450    let Some(program) = parts.next() else {
451        return EvidenceCommandRecord {
452            id: id.to_string(),
453            command: command.to_string(),
454            skipped: false,
455            skip_reason: None,
456            success: false,
457            timed_out: false,
458            status_code: None,
459            duration_ms: started_at.elapsed().as_millis(),
460            stdout: String::new(),
461            stderr: "empty evidence command".to_string(),
462        };
463    };
464    let mut child = match Command::new(program)
465        .args(parts)
466        .current_dir(root)
467        .stdout(Stdio::piped())
468        .stderr(Stdio::piped())
469        .spawn()
470    {
471        Ok(child) => child,
472        Err(error) => {
473            return EvidenceCommandRecord {
474                id: id.to_string(),
475                command: command.to_string(),
476                skipped: false,
477                skip_reason: None,
478                success: false,
479                timed_out: false,
480                status_code: None,
481                duration_ms: started_at.elapsed().as_millis(),
482                stdout: String::new(),
483                stderr: error.to_string(),
484            };
485        }
486    };
487
488    let mut timed_out = false;
489    loop {
490        match child.try_wait() {
491            Ok(Some(_)) => break,
492            Ok(None) if started_at.elapsed() >= timeout => {
493                timed_out = true;
494                let _ = child.kill();
495                break;
496            }
497            Ok(None) => std::thread::sleep(Duration::from_millis(50)),
498            Err(error) => {
499                return EvidenceCommandRecord {
500                    id: id.to_string(),
501                    command: command.to_string(),
502                    skipped: false,
503                    skip_reason: None,
504                    success: false,
505                    timed_out: false,
506                    status_code: None,
507                    duration_ms: started_at.elapsed().as_millis(),
508                    stdout: String::new(),
509                    stderr: error.to_string(),
510                };
511            }
512        }
513    }
514
515    match child.wait_with_output() {
516        Ok(output) => EvidenceCommandRecord {
517            id: id.to_string(),
518            command: command.to_string(),
519            skipped: false,
520            skip_reason: None,
521            success: !timed_out && output.status.success(),
522            timed_out,
523            status_code: output.status.code(),
524            duration_ms: started_at.elapsed().as_millis(),
525            stdout: String::from_utf8_lossy(&output.stdout).to_string(),
526            stderr: String::from_utf8_lossy(&output.stderr).to_string(),
527        },
528        Err(error) => EvidenceCommandRecord {
529            id: id.to_string(),
530            command: command.to_string(),
531            skipped: false,
532            skip_reason: None,
533            success: false,
534            timed_out,
535            status_code: None,
536            duration_ms: started_at.elapsed().as_millis(),
537            stdout: String::new(),
538            stderr: error.to_string(),
539        },
540    }
541}
542
543fn skipped_command(id: &str, command: &str, reason: &str) -> EvidenceCommandRecord {
544    EvidenceCommandRecord {
545        id: id.to_string(),
546        command: command.to_string(),
547        skipped: true,
548        skip_reason: Some(reason.to_string()),
549        success: false,
550        timed_out: false,
551        status_code: None,
552        duration_ms: 0,
553        stdout: String::new(),
554        stderr: String::new(),
555    }
556}
557
558fn evidence_metrics(commands: &[EvidenceCommandRecord]) -> Vec<EvidenceMetric> {
559    let mut metrics = Vec::new();
560    if let Some(command) = commands.iter().find(|command| command.id == "coverage") {
561        if let Some(percent) = last_percent(&format!("{}\n{}", command.stdout, command.stderr)) {
562            metrics.push(EvidenceMetric {
563                id: "coverage-percent".to_string(),
564                label: "Line coverage".to_string(),
565                value: percent,
566                unit: "percent".to_string(),
567                source_command: command.id.clone(),
568            });
569        }
570    }
571    if let Some(command) = commands.iter().find(|command| command.id == "mutation") {
572        if let Some(percent) = last_percent(&format!("{}\n{}", command.stdout, command.stderr)) {
573            metrics.push(EvidenceMetric {
574                id: "mutation-score-percent".to_string(),
575                label: "Mutation score".to_string(),
576                value: percent,
577                unit: "percent".to_string(),
578                source_command: command.id.clone(),
579            });
580        }
581    }
582    metrics
583}
584
585fn metric_value(metrics: &[EvidenceMetric], id: &str) -> Option<f64> {
586    metrics
587        .iter()
588        .find(|metric| metric.id == id)
589        .map(|metric| metric.value)
590}
591
592fn last_percent(output: &str) -> Option<f64> {
593    output
594        .split_whitespace()
595        .filter_map(|token| token.trim_end_matches('%').parse::<f64>().ok())
596        .next_back()
597}
598
599fn grade_from_commands(commands: &[EvidenceCommandRecord]) -> EvidenceGrade {
600    let metadata_ok = command_success(commands, "cargo-metadata");
601    if !metadata_ok {
602        return EvidenceGrade::None;
603    }
604    let tests_ok = command_success(commands, "cargo-test");
605    if !tests_ok {
606        return EvidenceGrade::Compiled;
607    }
608    let coverage_ok = command_success(commands, "coverage");
609    let mutation_ok = command_success(commands, "mutation");
610    let semver_ok = command_success(commands, "semver-checks");
611    if coverage_ok && mutation_ok && semver_ok {
612        EvidenceGrade::Proven
613    } else if coverage_ok && mutation_ok {
614        EvidenceGrade::Hardened
615    } else if coverage_ok {
616        EvidenceGrade::Covered
617    } else {
618        EvidenceGrade::Tested
619    }
620}
621
622fn command_success(commands: &[EvidenceCommandRecord], id: &str) -> bool {
623    commands
624        .iter()
625        .any(|command| command.id == id && command.success)
626}
627
628fn analysis_depth_for_grade(grade: EvidenceGrade) -> EvidenceAnalysisDepth {
629    match grade {
630        EvidenceGrade::None => EvidenceAnalysisDepth::None,
631        EvidenceGrade::Compiled => EvidenceAnalysisDepth::Mechanical,
632        EvidenceGrade::Tested => EvidenceAnalysisDepth::BoundaryAware,
633        EvidenceGrade::Covered | EvidenceGrade::Hardened | EvidenceGrade::Proven => {
634            EvidenceAnalysisDepth::Structural
635        }
636    }
637}
638
639fn unlocked_recipe_tiers(grade: EvidenceGrade) -> Vec<String> {
640    let mut tiers = Vec::new();
641    if grade >= EvidenceGrade::Compiled {
642        tiers.push("Tier 1 mechanical recipes".to_string());
643    }
644    if grade >= EvidenceGrade::Covered {
645        tiers.push("Tier 2 structural mechanical recipes".to_string());
646    }
647    if grade >= EvidenceGrade::Hardened {
648        tiers.push("Tier 3 semantic planning candidates".to_string());
649    }
650    tiers
651}
652
653fn unlock_suggestions(grade: EvidenceGrade, config: &EvidenceRunConfig) -> Vec<String> {
654    let mut suggestions = Vec::new();
655    if grade < EvidenceGrade::Tested {
656        suggestions.push("Make `cargo test` pass to unlock tested evidence.".to_string());
657    }
658    if !config.include_coverage {
659        suggestions.push(
660            "Run `mdx-rust evidence --include-coverage` after installing cargo-llvm-cov to unlock Tier 2 autonomous recipes.".to_string(),
661        );
662    }
663    if !config.include_mutation {
664        suggestions.push(
665            "Run `mdx-rust evidence --include-mutation` after installing cargo-mutants to unlock hardened autonomy.".to_string(),
666        );
667    }
668    suggestions
669}
670
671fn evidence_note(grade: EvidenceGrade) -> String {
672    match grade {
673        EvidenceGrade::None => "no usable Cargo evidence was collected".to_string(),
674        EvidenceGrade::Compiled => {
675            "Cargo metadata exists, but tests did not pass during evidence collection".to_string()
676        }
677        EvidenceGrade::Tested => {
678            "tests passed; Tier 1 autonomy is allowed and Tier 2 remains gated by coverage"
679                .to_string()
680        }
681        EvidenceGrade::Covered => {
682            "tests and coverage passed; Tier 2 structural mechanical recipes may run".to_string()
683        }
684        EvidenceGrade::Hardened => {
685            "tests, coverage, and mutation evidence passed; hardened autonomy is unlocked"
686                .to_string()
687        }
688        EvidenceGrade::Proven => {
689            "tests, coverage, mutation, and semver evidence passed; highest autonomy is unlocked"
690                .to_string()
691        }
692    }
693}
694
695fn evidence_run_id(
696    root: &Path,
697    target: Option<&Path>,
698    commands: &[EvidenceCommandRecord],
699) -> String {
700    let mut bytes = Vec::new();
701    bytes.extend_from_slice(root.display().to_string().as_bytes());
702    bytes.extend_from_slice(format!("{target:?}").as_bytes());
703    bytes.extend_from_slice(format!("{commands:?}").as_bytes());
704    stable_hash_hex(&bytes)
705}
706
707fn persist_evidence_run(artifact_root: &Path, run: &EvidenceRun) -> anyhow::Result<PathBuf> {
708    let dir = artifact_root.join("evidence");
709    std::fs::create_dir_all(&dir)?;
710    let millis = std::time::SystemTime::now()
711        .duration_since(std::time::UNIX_EPOCH)
712        .map(|duration| duration.as_millis())
713        .unwrap_or(0);
714    Ok(dir.join(format!(
715        "evidence-{millis}-{}.json",
716        sanitize_id(&run.run_id)
717    )))
718}
719
720fn sanitize_id(value: &str) -> String {
721    value
722        .chars()
723        .map(|ch| if ch.is_ascii_alphanumeric() { ch } else { '-' })
724        .collect::<String>()
725        .trim_matches('-')
726        .to_string()
727}
728
729fn executable_exists(name: &str) -> bool {
730    let Some(path_var) = std::env::var_os("PATH") else {
731        return false;
732    };
733    std::env::split_paths(&path_var).any(|dir| dir.join(name).is_file())
734}
735
736#[cfg(test)]
737mod tests {
738    use super::*;
739
740    #[test]
741    fn evidence_metrics_parse_percentages_from_tool_output() {
742        let commands = vec![
743            EvidenceCommandRecord {
744                id: "coverage".to_string(),
745                command: "cargo llvm-cov --workspace --summary-only".to_string(),
746                skipped: false,
747                skip_reason: None,
748                success: true,
749                timed_out: false,
750                status_code: Some(0),
751                duration_ms: 12,
752                stdout: "total 91.7%".to_string(),
753                stderr: String::new(),
754            },
755            EvidenceCommandRecord {
756                id: "mutation".to_string(),
757                command: "cargo mutants --no-shuffle --timeout 60".to_string(),
758                skipped: false,
759                skip_reason: None,
760                success: true,
761                timed_out: false,
762                status_code: Some(0),
763                duration_ms: 12,
764                stdout: String::new(),
765                stderr: "mutation score 82.5%".to_string(),
766            },
767        ];
768
769        let metrics = evidence_metrics(&commands);
770
771        assert!(metrics
772            .iter()
773            .any(|metric| metric.id == "coverage-percent"
774                && (metric.value - 91.7).abs() < f64::EPSILON));
775        assert!(metrics
776            .iter()
777            .any(|metric| metric.id == "mutation-score-percent"
778                && (metric.value - 82.5).abs() < f64::EPSILON));
779    }
780}