Skip to main content

mdx_rust_core/
evidence.rs

1//! Measured behavioral evidence for autonomous Rust evolution.
2//!
3//! v0.7 makes evidence a persisted artifact instead of an inferred hint. The
4//! refactor planner and autopilot can consume the latest evidence run to decide
5//! how much autonomy is allowed.
6
7use crate::eval::stable_hash_hex;
8use crate::refactor::{EvidenceAnalysisDepth, EvidenceGrade};
9use schemars::JsonSchema;
10use serde::{Deserialize, Serialize};
11use std::path::{Component, Path, PathBuf};
12use std::process::{Command, Stdio};
13use std::time::{Duration, Instant};
14
15#[derive(Debug, Clone)]
16pub struct EvidenceRunConfig {
17    pub target: Option<PathBuf>,
18    pub include_coverage: bool,
19    pub include_mutation: bool,
20    pub include_semver: bool,
21    pub command_timeout: Duration,
22}
23
24impl Default for EvidenceRunConfig {
25    fn default() -> Self {
26        Self {
27            target: None,
28            include_coverage: false,
29            include_mutation: false,
30            include_semver: false,
31            command_timeout: Duration::from_secs(180),
32        }
33    }
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
37pub struct EvidenceRun {
38    pub schema_version: String,
39    pub run_id: String,
40    pub root: String,
41    pub target: Option<String>,
42    pub grade: EvidenceGrade,
43    pub analysis_depth: EvidenceAnalysisDepth,
44    pub metrics: Vec<EvidenceMetric>,
45    pub commands: Vec<EvidenceCommandRecord>,
46    pub unlocked_recipe_tiers: Vec<String>,
47    pub unlock_suggestions: Vec<String>,
48    pub note: String,
49    pub artifact_path: Option<String>,
50}
51
52#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
53pub struct EvidenceMetric {
54    pub id: String,
55    pub label: String,
56    pub value: f64,
57    pub unit: String,
58    pub source_command: String,
59}
60
61#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
62pub struct EvidenceCommandRecord {
63    pub id: String,
64    pub command: String,
65    pub skipped: bool,
66    pub skip_reason: Option<String>,
67    pub success: bool,
68    pub timed_out: bool,
69    pub status_code: Option<i32>,
70    pub duration_ms: u128,
71    pub stdout: String,
72    pub stderr: String,
73}
74
75#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
76pub struct EvidenceArtifactRef {
77    pub run_id: String,
78    pub grade: EvidenceGrade,
79    pub analysis_depth: EvidenceAnalysisDepth,
80    pub artifact_path: Option<String>,
81}
82
83impl From<&EvidenceRun> for EvidenceArtifactRef {
84    fn from(run: &EvidenceRun) -> Self {
85        Self {
86            run_id: run.run_id.clone(),
87            grade: run.grade,
88            analysis_depth: run.analysis_depth.clone(),
89            artifact_path: run.artifact_path.clone(),
90        }
91    }
92}
93
94pub fn run_evidence(
95    root: &Path,
96    artifact_root: Option<&Path>,
97    config: &EvidenceRunConfig,
98) -> anyhow::Result<EvidenceRun> {
99    let root = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
100    let target = config
101        .target
102        .as_ref()
103        .map(|path| resolve_target(&root, path))
104        .transpose()?;
105
106    let mut commands = Vec::new();
107    commands.push(run_command(
108        &root,
109        "cargo-metadata",
110        "cargo metadata --no-deps --format-version 1",
111        config.command_timeout,
112    ));
113    commands.push(run_command(
114        &root,
115        "cargo-test",
116        "cargo test",
117        config.command_timeout,
118    ));
119
120    if config.include_coverage {
121        commands.push(run_optional_cargo_subcommand(
122            &root,
123            "cargo-llvm-cov",
124            "coverage",
125            "cargo llvm-cov --workspace --summary-only",
126            config.command_timeout,
127        ));
128    } else {
129        commands.push(skipped_command(
130            "coverage",
131            "cargo llvm-cov --workspace --summary-only",
132            "coverage evidence was not requested",
133        ));
134    }
135
136    if config.include_mutation {
137        commands.push(run_optional_cargo_subcommand(
138            &root,
139            "cargo-mutants",
140            "mutation",
141            "cargo mutants --no-shuffle --timeout 60",
142            config.command_timeout,
143        ));
144    } else {
145        commands.push(skipped_command(
146            "mutation",
147            "cargo mutants --no-shuffle --timeout 60",
148            "mutation evidence was not requested",
149        ));
150    }
151
152    if config.include_semver {
153        commands.push(run_optional_cargo_subcommand(
154            &root,
155            "cargo-semver-checks",
156            "semver-checks",
157            "cargo semver-checks",
158            config.command_timeout,
159        ));
160    } else {
161        commands.push(skipped_command(
162            "semver-checks",
163            "cargo semver-checks",
164            "semver evidence was not requested",
165        ));
166    }
167
168    let grade = grade_from_commands(&commands);
169    let analysis_depth = analysis_depth_for_grade(grade);
170    let metrics = evidence_metrics(&commands);
171    let mut run = EvidenceRun {
172        schema_version: "0.7".to_string(),
173        run_id: evidence_run_id(&root, target.as_deref(), &commands),
174        root: root.display().to_string(),
175        target: target.as_ref().map(|path| path.display().to_string()),
176        grade,
177        analysis_depth,
178        metrics,
179        commands,
180        unlocked_recipe_tiers: unlocked_recipe_tiers(grade),
181        unlock_suggestions: unlock_suggestions(grade, config),
182        note: evidence_note(grade),
183        artifact_path: None,
184    };
185
186    if let Some(artifact_root) = artifact_root {
187        let path = persist_evidence_run(artifact_root, &run)?;
188        run.artifact_path = Some(path.display().to_string());
189        std::fs::write(&path, serde_json::to_string_pretty(&run)?)?;
190    }
191
192    Ok(run)
193}
194
195pub fn load_latest_evidence(artifact_root: Option<&Path>) -> anyhow::Result<Option<EvidenceRun>> {
196    load_latest_evidence_matching(artifact_root, |_| true)
197}
198
199pub fn load_latest_evidence_for_root(
200    artifact_root: Option<&Path>,
201    root: &Path,
202) -> anyhow::Result<Option<EvidenceRun>> {
203    let root = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
204    load_latest_evidence_matching(artifact_root, |run| run.root == root.display().to_string())
205}
206
207fn load_latest_evidence_matching(
208    artifact_root: Option<&Path>,
209    matches_run: impl Fn(&EvidenceRun) -> bool,
210) -> anyhow::Result<Option<EvidenceRun>> {
211    let Some(artifact_root) = artifact_root else {
212        return Ok(None);
213    };
214    let dir = artifact_root.join("evidence");
215    if !dir.exists() {
216        return Ok(None);
217    }
218
219    let mut entries = Vec::new();
220    for entry in std::fs::read_dir(&dir)? {
221        let entry = entry?;
222        let path = entry.path();
223        if path.extension().is_some_and(|ext| ext == "json") {
224            let modified = entry
225                .metadata()
226                .and_then(|metadata| metadata.modified())
227                .ok();
228            entries.push((modified, path));
229        }
230    }
231    entries.sort_by(|left, right| left.0.cmp(&right.0).then_with(|| left.1.cmp(&right.1)));
232
233    while let Some((_, path)) = entries.pop() {
234        let Ok(content) = std::fs::read_to_string(path) else {
235            continue;
236        };
237        let Ok(run) = serde_json::from_str::<EvidenceRun>(&content) else {
238            continue;
239        };
240        if matches_run(&run) {
241            return Ok(Some(run));
242        }
243    }
244    Ok(None)
245}
246
247fn resolve_target(root: &Path, target: &Path) -> anyhow::Result<PathBuf> {
248    if target
249        .components()
250        .any(|component| matches!(component, Component::ParentDir))
251    {
252        anyhow::bail!(
253            "evidence target must stay inside root: {}",
254            target.display()
255        );
256    }
257    let resolved = if target.is_absolute() {
258        target.to_path_buf()
259    } else {
260        root.join(target)
261    };
262    if !resolved.starts_with(root) {
263        anyhow::bail!("evidence target is outside root: {}", target.display());
264    }
265    Ok(resolved
266        .strip_prefix(root)
267        .unwrap_or(&resolved)
268        .to_path_buf())
269}
270
271fn run_optional_cargo_subcommand(
272    root: &Path,
273    executable: &str,
274    id: &str,
275    command: &str,
276    timeout: Duration,
277) -> EvidenceCommandRecord {
278    if !executable_exists(executable) {
279        return skipped_command(id, command, &format!("{executable} was not found on PATH"));
280    }
281    run_command(root, id, command, timeout)
282}
283
284fn run_command(root: &Path, id: &str, command: &str, timeout: Duration) -> EvidenceCommandRecord {
285    let started_at = Instant::now();
286    let mut parts = command.split_whitespace();
287    let Some(program) = parts.next() else {
288        return EvidenceCommandRecord {
289            id: id.to_string(),
290            command: command.to_string(),
291            skipped: false,
292            skip_reason: None,
293            success: false,
294            timed_out: false,
295            status_code: None,
296            duration_ms: started_at.elapsed().as_millis(),
297            stdout: String::new(),
298            stderr: "empty evidence command".to_string(),
299        };
300    };
301    let mut child = match Command::new(program)
302        .args(parts)
303        .current_dir(root)
304        .stdout(Stdio::piped())
305        .stderr(Stdio::piped())
306        .spawn()
307    {
308        Ok(child) => child,
309        Err(error) => {
310            return EvidenceCommandRecord {
311                id: id.to_string(),
312                command: command.to_string(),
313                skipped: false,
314                skip_reason: None,
315                success: false,
316                timed_out: false,
317                status_code: None,
318                duration_ms: started_at.elapsed().as_millis(),
319                stdout: String::new(),
320                stderr: error.to_string(),
321            };
322        }
323    };
324
325    let mut timed_out = false;
326    loop {
327        match child.try_wait() {
328            Ok(Some(_)) => break,
329            Ok(None) if started_at.elapsed() >= timeout => {
330                timed_out = true;
331                let _ = child.kill();
332                break;
333            }
334            Ok(None) => std::thread::sleep(Duration::from_millis(50)),
335            Err(error) => {
336                return EvidenceCommandRecord {
337                    id: id.to_string(),
338                    command: command.to_string(),
339                    skipped: false,
340                    skip_reason: None,
341                    success: false,
342                    timed_out: false,
343                    status_code: None,
344                    duration_ms: started_at.elapsed().as_millis(),
345                    stdout: String::new(),
346                    stderr: error.to_string(),
347                };
348            }
349        }
350    }
351
352    match child.wait_with_output() {
353        Ok(output) => EvidenceCommandRecord {
354            id: id.to_string(),
355            command: command.to_string(),
356            skipped: false,
357            skip_reason: None,
358            success: !timed_out && output.status.success(),
359            timed_out,
360            status_code: output.status.code(),
361            duration_ms: started_at.elapsed().as_millis(),
362            stdout: String::from_utf8_lossy(&output.stdout).to_string(),
363            stderr: String::from_utf8_lossy(&output.stderr).to_string(),
364        },
365        Err(error) => EvidenceCommandRecord {
366            id: id.to_string(),
367            command: command.to_string(),
368            skipped: false,
369            skip_reason: None,
370            success: false,
371            timed_out,
372            status_code: None,
373            duration_ms: started_at.elapsed().as_millis(),
374            stdout: String::new(),
375            stderr: error.to_string(),
376        },
377    }
378}
379
380fn skipped_command(id: &str, command: &str, reason: &str) -> EvidenceCommandRecord {
381    EvidenceCommandRecord {
382        id: id.to_string(),
383        command: command.to_string(),
384        skipped: true,
385        skip_reason: Some(reason.to_string()),
386        success: false,
387        timed_out: false,
388        status_code: None,
389        duration_ms: 0,
390        stdout: String::new(),
391        stderr: String::new(),
392    }
393}
394
395fn evidence_metrics(commands: &[EvidenceCommandRecord]) -> Vec<EvidenceMetric> {
396    let mut metrics = Vec::new();
397    if let Some(command) = commands.iter().find(|command| command.id == "coverage") {
398        if let Some(percent) = last_percent(&format!("{}\n{}", command.stdout, command.stderr)) {
399            metrics.push(EvidenceMetric {
400                id: "coverage-percent".to_string(),
401                label: "Line coverage".to_string(),
402                value: percent,
403                unit: "percent".to_string(),
404                source_command: command.id.clone(),
405            });
406        }
407    }
408    if let Some(command) = commands.iter().find(|command| command.id == "mutation") {
409        if let Some(percent) = last_percent(&format!("{}\n{}", command.stdout, command.stderr)) {
410            metrics.push(EvidenceMetric {
411                id: "mutation-score-percent".to_string(),
412                label: "Mutation score".to_string(),
413                value: percent,
414                unit: "percent".to_string(),
415                source_command: command.id.clone(),
416            });
417        }
418    }
419    metrics
420}
421
422fn last_percent(output: &str) -> Option<f64> {
423    output
424        .split_whitespace()
425        .filter_map(|token| token.trim_end_matches('%').parse::<f64>().ok())
426        .next_back()
427}
428
429fn grade_from_commands(commands: &[EvidenceCommandRecord]) -> EvidenceGrade {
430    let metadata_ok = command_success(commands, "cargo-metadata");
431    if !metadata_ok {
432        return EvidenceGrade::None;
433    }
434    let tests_ok = command_success(commands, "cargo-test");
435    if !tests_ok {
436        return EvidenceGrade::Compiled;
437    }
438    let coverage_ok = command_success(commands, "coverage");
439    let mutation_ok = command_success(commands, "mutation");
440    let semver_ok = command_success(commands, "semver-checks");
441    if coverage_ok && mutation_ok && semver_ok {
442        EvidenceGrade::Proven
443    } else if coverage_ok && mutation_ok {
444        EvidenceGrade::Hardened
445    } else if coverage_ok {
446        EvidenceGrade::Covered
447    } else {
448        EvidenceGrade::Tested
449    }
450}
451
452fn command_success(commands: &[EvidenceCommandRecord], id: &str) -> bool {
453    commands
454        .iter()
455        .any(|command| command.id == id && command.success)
456}
457
458fn analysis_depth_for_grade(grade: EvidenceGrade) -> EvidenceAnalysisDepth {
459    match grade {
460        EvidenceGrade::None => EvidenceAnalysisDepth::None,
461        EvidenceGrade::Compiled => EvidenceAnalysisDepth::Mechanical,
462        EvidenceGrade::Tested => EvidenceAnalysisDepth::BoundaryAware,
463        EvidenceGrade::Covered | EvidenceGrade::Hardened | EvidenceGrade::Proven => {
464            EvidenceAnalysisDepth::Structural
465        }
466    }
467}
468
469fn unlocked_recipe_tiers(grade: EvidenceGrade) -> Vec<String> {
470    let mut tiers = Vec::new();
471    if grade >= EvidenceGrade::Compiled {
472        tiers.push("Tier 1 mechanical recipes".to_string());
473    }
474    if grade >= EvidenceGrade::Covered {
475        tiers.push("Tier 2 structural mechanical recipes".to_string());
476    }
477    if grade >= EvidenceGrade::Hardened {
478        tiers.push("Tier 3 semantic planning candidates".to_string());
479    }
480    tiers
481}
482
483fn unlock_suggestions(grade: EvidenceGrade, config: &EvidenceRunConfig) -> Vec<String> {
484    let mut suggestions = Vec::new();
485    if grade < EvidenceGrade::Tested {
486        suggestions.push("Make `cargo test` pass to unlock tested evidence.".to_string());
487    }
488    if !config.include_coverage {
489        suggestions.push(
490            "Run `mdx-rust evidence --include-coverage` after installing cargo-llvm-cov to unlock Tier 2 autonomous recipes.".to_string(),
491        );
492    }
493    if !config.include_mutation {
494        suggestions.push(
495            "Run `mdx-rust evidence --include-mutation` after installing cargo-mutants to unlock hardened autonomy.".to_string(),
496        );
497    }
498    suggestions
499}
500
501fn evidence_note(grade: EvidenceGrade) -> String {
502    match grade {
503        EvidenceGrade::None => "no usable Cargo evidence was collected".to_string(),
504        EvidenceGrade::Compiled => {
505            "Cargo metadata exists, but tests did not pass during evidence collection".to_string()
506        }
507        EvidenceGrade::Tested => {
508            "tests passed; Tier 1 autonomy is allowed and Tier 2 remains gated by coverage"
509                .to_string()
510        }
511        EvidenceGrade::Covered => {
512            "tests and coverage passed; Tier 2 structural mechanical recipes may run".to_string()
513        }
514        EvidenceGrade::Hardened => {
515            "tests, coverage, and mutation evidence passed; hardened autonomy is unlocked"
516                .to_string()
517        }
518        EvidenceGrade::Proven => {
519            "tests, coverage, mutation, and semver evidence passed; highest autonomy is unlocked"
520                .to_string()
521        }
522    }
523}
524
525fn evidence_run_id(
526    root: &Path,
527    target: Option<&Path>,
528    commands: &[EvidenceCommandRecord],
529) -> String {
530    let mut bytes = Vec::new();
531    bytes.extend_from_slice(root.display().to_string().as_bytes());
532    bytes.extend_from_slice(format!("{target:?}").as_bytes());
533    bytes.extend_from_slice(format!("{commands:?}").as_bytes());
534    stable_hash_hex(&bytes)
535}
536
537fn persist_evidence_run(artifact_root: &Path, run: &EvidenceRun) -> anyhow::Result<PathBuf> {
538    let dir = artifact_root.join("evidence");
539    std::fs::create_dir_all(&dir)?;
540    let millis = std::time::SystemTime::now()
541        .duration_since(std::time::UNIX_EPOCH)
542        .map(|duration| duration.as_millis())
543        .unwrap_or(0);
544    Ok(dir.join(format!(
545        "evidence-{millis}-{}.json",
546        sanitize_id(&run.run_id)
547    )))
548}
549
550fn sanitize_id(value: &str) -> String {
551    value
552        .chars()
553        .map(|ch| if ch.is_ascii_alphanumeric() { ch } else { '-' })
554        .collect::<String>()
555        .trim_matches('-')
556        .to_string()
557}
558
559fn executable_exists(name: &str) -> bool {
560    let Some(path_var) = std::env::var_os("PATH") else {
561        return false;
562    };
563    std::env::split_paths(&path_var).any(|dir| dir.join(name).is_file())
564}
565
566#[cfg(test)]
567mod tests {
568    use super::*;
569
570    #[test]
571    fn evidence_metrics_parse_percentages_from_tool_output() {
572        let commands = vec![
573            EvidenceCommandRecord {
574                id: "coverage".to_string(),
575                command: "cargo llvm-cov --workspace --summary-only".to_string(),
576                skipped: false,
577                skip_reason: None,
578                success: true,
579                timed_out: false,
580                status_code: Some(0),
581                duration_ms: 12,
582                stdout: "total 91.7%".to_string(),
583                stderr: String::new(),
584            },
585            EvidenceCommandRecord {
586                id: "mutation".to_string(),
587                command: "cargo mutants --no-shuffle --timeout 60".to_string(),
588                skipped: false,
589                skip_reason: None,
590                success: true,
591                timed_out: false,
592                status_code: Some(0),
593                duration_ms: 12,
594                stdout: String::new(),
595                stderr: "mutation score 82.5%".to_string(),
596            },
597        ];
598
599        let metrics = evidence_metrics(&commands);
600
601        assert!(metrics
602            .iter()
603            .any(|metric| metric.id == "coverage-percent"
604                && (metric.value - 91.7).abs() < f64::EPSILON));
605        assert!(metrics
606            .iter()
607            .any(|metric| metric.id == "mutation-score-percent"
608                && (metric.value - 82.5).abs() < f64::EPSILON));
609    }
610}