Skip to main content

mdx_rust_core/
evidence.rs

1//! Measured behavioral evidence for autonomous Rust evolution.
2//!
3//! v0.8 makes evidence more granular than a repo-level hint. The refactor
4//! planner and autopilot can consume file/function evidence profiles to decide
5//! how much autonomy is allowed on a target.
6
7use crate::eval::stable_hash_hex;
8use crate::refactor::{EvidenceAnalysisDepth, EvidenceGrade};
9use schemars::JsonSchema;
10use serde::{Deserialize, Serialize};
11use std::path::{Component, Path, PathBuf};
12use std::process::{Command, Stdio};
13use std::time::{Duration, Instant};
14
15#[derive(Debug, Clone)]
16pub struct EvidenceRunConfig {
17    pub target: Option<PathBuf>,
18    pub include_coverage: bool,
19    pub include_mutation: bool,
20    pub include_semver: bool,
21    pub command_timeout: Duration,
22}
23
24impl Default for EvidenceRunConfig {
25    fn default() -> Self {
26        Self {
27            target: None,
28            include_coverage: false,
29            include_mutation: false,
30            include_semver: false,
31            command_timeout: Duration::from_secs(180),
32        }
33    }
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
37pub struct EvidenceRun {
38    pub schema_version: String,
39    pub run_id: String,
40    pub root: String,
41    pub target: Option<String>,
42    pub grade: EvidenceGrade,
43    pub analysis_depth: EvidenceAnalysisDepth,
44    pub metrics: Vec<EvidenceMetric>,
45    #[serde(default)]
46    pub file_profiles: Vec<EvidenceFileProfile>,
47    pub commands: Vec<EvidenceCommandRecord>,
48    pub unlocked_recipe_tiers: Vec<String>,
49    pub unlock_suggestions: Vec<String>,
50    pub note: String,
51    pub artifact_path: Option<String>,
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
55pub struct EvidenceFileProfile {
56    pub file: String,
57    pub grade: EvidenceGrade,
58    pub analysis_depth: EvidenceAnalysisDepth,
59    pub signals: Vec<String>,
60    pub function_profiles: Vec<EvidenceFunctionProfile>,
61}
62
63#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
64pub struct EvidenceFunctionProfile {
65    pub name: String,
66    pub line: usize,
67    pub grade: EvidenceGrade,
68    pub signals: Vec<String>,
69}
70
71#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
72pub struct EvidenceMetric {
73    pub id: String,
74    pub label: String,
75    pub value: f64,
76    pub unit: String,
77    pub source_command: String,
78}
79
80#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
81pub struct EvidenceCommandRecord {
82    pub id: String,
83    pub command: String,
84    pub skipped: bool,
85    pub skip_reason: Option<String>,
86    pub success: bool,
87    pub timed_out: bool,
88    pub status_code: Option<i32>,
89    pub duration_ms: u128,
90    pub stdout: String,
91    pub stderr: String,
92}
93
94#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
95pub struct EvidenceArtifactRef {
96    pub run_id: String,
97    pub grade: EvidenceGrade,
98    pub analysis_depth: EvidenceAnalysisDepth,
99    #[serde(default)]
100    pub profiled_files: usize,
101    pub artifact_path: Option<String>,
102}
103
104impl From<&EvidenceRun> for EvidenceArtifactRef {
105    fn from(run: &EvidenceRun) -> Self {
106        Self {
107            run_id: run.run_id.clone(),
108            grade: run.grade,
109            analysis_depth: run.analysis_depth.clone(),
110            profiled_files: run.file_profiles.len(),
111            artifact_path: run.artifact_path.clone(),
112        }
113    }
114}
115
116pub fn run_evidence(
117    root: &Path,
118    artifact_root: Option<&Path>,
119    config: &EvidenceRunConfig,
120) -> anyhow::Result<EvidenceRun> {
121    let root = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
122    let target = config
123        .target
124        .as_ref()
125        .map(|path| resolve_target(&root, path))
126        .transpose()?;
127
128    let mut commands = Vec::new();
129    commands.push(run_command(
130        &root,
131        "cargo-metadata",
132        "cargo metadata --no-deps --format-version 1",
133        config.command_timeout,
134    ));
135    commands.push(run_command(
136        &root,
137        "cargo-test",
138        "cargo test",
139        config.command_timeout,
140    ));
141
142    if config.include_coverage {
143        commands.push(run_optional_cargo_subcommand(
144            &root,
145            "cargo-llvm-cov",
146            "coverage",
147            "cargo llvm-cov --workspace --summary-only",
148            config.command_timeout,
149        ));
150    } else {
151        commands.push(skipped_command(
152            "coverage",
153            "cargo llvm-cov --workspace --summary-only",
154            "coverage evidence was not requested",
155        ));
156    }
157
158    if config.include_mutation {
159        commands.push(run_optional_cargo_subcommand(
160            &root,
161            "cargo-mutants",
162            "mutation",
163            "cargo mutants --no-shuffle --timeout 60",
164            config.command_timeout,
165        ));
166    } else {
167        commands.push(skipped_command(
168            "mutation",
169            "cargo mutants --no-shuffle --timeout 60",
170            "mutation evidence was not requested",
171        ));
172    }
173
174    if config.include_semver {
175        commands.push(run_optional_cargo_subcommand(
176            &root,
177            "cargo-semver-checks",
178            "semver-checks",
179            "cargo semver-checks",
180            config.command_timeout,
181        ));
182    } else {
183        commands.push(skipped_command(
184            "semver-checks",
185            "cargo semver-checks",
186            "semver evidence was not requested",
187        ));
188    }
189
190    let grade = grade_from_commands(&commands);
191    let analysis_depth = analysis_depth_for_grade(grade);
192    let metrics = evidence_metrics(&commands);
193    let file_profiles = evidence_file_profiles(&root, target.as_deref(), grade)?;
194    let mut run = EvidenceRun {
195        schema_version: "0.8".to_string(),
196        run_id: evidence_run_id(&root, target.as_deref(), &commands),
197        root: root.display().to_string(),
198        target: target.as_ref().map(|path| path.display().to_string()),
199        grade,
200        analysis_depth,
201        metrics,
202        file_profiles,
203        commands,
204        unlocked_recipe_tiers: unlocked_recipe_tiers(grade),
205        unlock_suggestions: unlock_suggestions(grade, config),
206        note: evidence_note(grade),
207        artifact_path: None,
208    };
209
210    if let Some(artifact_root) = artifact_root {
211        let path = persist_evidence_run(artifact_root, &run)?;
212        run.artifact_path = Some(path.display().to_string());
213        std::fs::write(&path, serde_json::to_string_pretty(&run)?)?;
214    }
215
216    Ok(run)
217}
218
219pub fn load_latest_evidence(artifact_root: Option<&Path>) -> anyhow::Result<Option<EvidenceRun>> {
220    load_latest_evidence_matching(artifact_root, |_| true)
221}
222
223pub fn load_latest_evidence_for_root(
224    artifact_root: Option<&Path>,
225    root: &Path,
226) -> anyhow::Result<Option<EvidenceRun>> {
227    let root = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
228    load_latest_evidence_matching(artifact_root, |run| run.root == root.display().to_string())
229}
230
231fn load_latest_evidence_matching(
232    artifact_root: Option<&Path>,
233    matches_run: impl Fn(&EvidenceRun) -> bool,
234) -> anyhow::Result<Option<EvidenceRun>> {
235    let Some(artifact_root) = artifact_root else {
236        return Ok(None);
237    };
238    let dir = artifact_root.join("evidence");
239    if !dir.exists() {
240        return Ok(None);
241    }
242
243    let mut entries = Vec::new();
244    for entry in std::fs::read_dir(&dir)? {
245        let entry = entry?;
246        let path = entry.path();
247        if path.extension().is_some_and(|ext| ext == "json") {
248            let modified = entry
249                .metadata()
250                .and_then(|metadata| metadata.modified())
251                .ok();
252            entries.push((modified, path));
253        }
254    }
255    entries.sort_by(|left, right| left.0.cmp(&right.0).then_with(|| left.1.cmp(&right.1)));
256
257    while let Some((_, path)) = entries.pop() {
258        let Ok(content) = std::fs::read_to_string(path) else {
259            continue;
260        };
261        let Ok(run) = serde_json::from_str::<EvidenceRun>(&content) else {
262            continue;
263        };
264        if matches_run(&run) {
265            return Ok(Some(run));
266        }
267    }
268    Ok(None)
269}
270
271fn resolve_target(root: &Path, target: &Path) -> anyhow::Result<PathBuf> {
272    if target
273        .components()
274        .any(|component| matches!(component, Component::ParentDir))
275    {
276        anyhow::bail!(
277            "evidence target must stay inside root: {}",
278            target.display()
279        );
280    }
281    let resolved = if target.is_absolute() {
282        target.to_path_buf()
283    } else {
284        root.join(target)
285    };
286    if !resolved.starts_with(root) {
287        anyhow::bail!("evidence target is outside root: {}", target.display());
288    }
289    Ok(resolved
290        .strip_prefix(root)
291        .unwrap_or(&resolved)
292        .to_path_buf())
293}
294
295fn evidence_file_profiles(
296    root: &Path,
297    target: Option<&Path>,
298    run_grade: EvidenceGrade,
299) -> anyhow::Result<Vec<EvidenceFileProfile>> {
300    let scan_root = target.map_or_else(|| root.to_path_buf(), |target| root.join(target));
301    let mut files = Vec::new();
302    collect_rust_files(&scan_root, &mut files)?;
303    files.sort();
304
305    let mut profiles = Vec::new();
306    for file in files.into_iter().take(250) {
307        let Ok(content) = std::fs::read_to_string(&file) else {
308            continue;
309        };
310        let relative = file
311            .strip_prefix(root)
312            .unwrap_or(&file)
313            .display()
314            .to_string();
315        let has_test_markers = content.contains("#[test]")
316            || content.contains("#[tokio::test]")
317            || content.contains("mod tests")
318            || content.contains("#[cfg(test)]");
319        let mut signals = vec!["cargo metadata collected for workspace".to_string()];
320        if command_grade_implies_tests(run_grade) {
321            signals.push("workspace cargo test passed during evidence run".to_string());
322        }
323        if has_test_markers {
324            signals.push("file contains Rust test markers".to_string());
325        }
326        let file_grade = if run_grade >= EvidenceGrade::Covered {
327            run_grade
328        } else if has_test_markers && run_grade >= EvidenceGrade::Tested {
329            EvidenceGrade::Tested
330        } else {
331            run_grade.min(EvidenceGrade::Compiled)
332        };
333        profiles.push(EvidenceFileProfile {
334            file: relative,
335            grade: file_grade,
336            analysis_depth: analysis_depth_for_grade(file_grade),
337            signals: signals.clone(),
338            function_profiles: function_profiles(&content, file_grade, &signals),
339        });
340    }
341
342    Ok(profiles)
343}
344
345fn collect_rust_files(path: &Path, files: &mut Vec<PathBuf>) -> anyhow::Result<()> {
346    if path.is_file() {
347        if path.extension().is_some_and(|extension| extension == "rs") {
348            files.push(path.to_path_buf());
349        }
350        return Ok(());
351    }
352    if !path.is_dir() {
353        return Ok(());
354    }
355    for entry in std::fs::read_dir(path)? {
356        let entry = entry?;
357        let path = entry.path();
358        if path.is_dir() {
359            let name = path
360                .file_name()
361                .and_then(|name| name.to_str())
362                .unwrap_or("");
363            if matches!(name, ".git" | ".mdx-rust" | "target") {
364                continue;
365            }
366            collect_rust_files(&path, files)?;
367        } else if path.extension().is_some_and(|extension| extension == "rs") {
368            files.push(path);
369        }
370    }
371    Ok(())
372}
373
374fn function_profiles(
375    content: &str,
376    file_grade: EvidenceGrade,
377    file_signals: &[String],
378) -> Vec<EvidenceFunctionProfile> {
379    content
380        .lines()
381        .enumerate()
382        .filter_map(|(index, line)| {
383            let trimmed = line.trim_start();
384            let name = trimmed
385                .strip_prefix("pub fn ")
386                .or_else(|| trimmed.strip_prefix("pub(crate) fn "))
387                .or_else(|| trimmed.strip_prefix("fn "))?;
388            let name = name
389                .split(['(', '<', ' '])
390                .next()
391                .filter(|name| !name.is_empty())?;
392            Some(EvidenceFunctionProfile {
393                name: name.to_string(),
394                line: index + 1,
395                grade: file_grade,
396                signals: file_signals.to_vec(),
397            })
398        })
399        .collect()
400}
401
402fn command_grade_implies_tests(grade: EvidenceGrade) -> bool {
403    grade >= EvidenceGrade::Tested
404}
405
406fn run_optional_cargo_subcommand(
407    root: &Path,
408    executable: &str,
409    id: &str,
410    command: &str,
411    timeout: Duration,
412) -> EvidenceCommandRecord {
413    if !executable_exists(executable) {
414        return skipped_command(id, command, &format!("{executable} was not found on PATH"));
415    }
416    run_command(root, id, command, timeout)
417}
418
419fn run_command(root: &Path, id: &str, command: &str, timeout: Duration) -> EvidenceCommandRecord {
420    let started_at = Instant::now();
421    let mut parts = command.split_whitespace();
422    let Some(program) = parts.next() else {
423        return EvidenceCommandRecord {
424            id: id.to_string(),
425            command: command.to_string(),
426            skipped: false,
427            skip_reason: None,
428            success: false,
429            timed_out: false,
430            status_code: None,
431            duration_ms: started_at.elapsed().as_millis(),
432            stdout: String::new(),
433            stderr: "empty evidence command".to_string(),
434        };
435    };
436    let mut child = match Command::new(program)
437        .args(parts)
438        .current_dir(root)
439        .stdout(Stdio::piped())
440        .stderr(Stdio::piped())
441        .spawn()
442    {
443        Ok(child) => child,
444        Err(error) => {
445            return EvidenceCommandRecord {
446                id: id.to_string(),
447                command: command.to_string(),
448                skipped: false,
449                skip_reason: None,
450                success: false,
451                timed_out: false,
452                status_code: None,
453                duration_ms: started_at.elapsed().as_millis(),
454                stdout: String::new(),
455                stderr: error.to_string(),
456            };
457        }
458    };
459
460    let mut timed_out = false;
461    loop {
462        match child.try_wait() {
463            Ok(Some(_)) => break,
464            Ok(None) if started_at.elapsed() >= timeout => {
465                timed_out = true;
466                let _ = child.kill();
467                break;
468            }
469            Ok(None) => std::thread::sleep(Duration::from_millis(50)),
470            Err(error) => {
471                return EvidenceCommandRecord {
472                    id: id.to_string(),
473                    command: command.to_string(),
474                    skipped: false,
475                    skip_reason: None,
476                    success: false,
477                    timed_out: false,
478                    status_code: None,
479                    duration_ms: started_at.elapsed().as_millis(),
480                    stdout: String::new(),
481                    stderr: error.to_string(),
482                };
483            }
484        }
485    }
486
487    match child.wait_with_output() {
488        Ok(output) => EvidenceCommandRecord {
489            id: id.to_string(),
490            command: command.to_string(),
491            skipped: false,
492            skip_reason: None,
493            success: !timed_out && output.status.success(),
494            timed_out,
495            status_code: output.status.code(),
496            duration_ms: started_at.elapsed().as_millis(),
497            stdout: String::from_utf8_lossy(&output.stdout).to_string(),
498            stderr: String::from_utf8_lossy(&output.stderr).to_string(),
499        },
500        Err(error) => EvidenceCommandRecord {
501            id: id.to_string(),
502            command: command.to_string(),
503            skipped: false,
504            skip_reason: None,
505            success: false,
506            timed_out,
507            status_code: None,
508            duration_ms: started_at.elapsed().as_millis(),
509            stdout: String::new(),
510            stderr: error.to_string(),
511        },
512    }
513}
514
515fn skipped_command(id: &str, command: &str, reason: &str) -> EvidenceCommandRecord {
516    EvidenceCommandRecord {
517        id: id.to_string(),
518        command: command.to_string(),
519        skipped: true,
520        skip_reason: Some(reason.to_string()),
521        success: false,
522        timed_out: false,
523        status_code: None,
524        duration_ms: 0,
525        stdout: String::new(),
526        stderr: String::new(),
527    }
528}
529
530fn evidence_metrics(commands: &[EvidenceCommandRecord]) -> Vec<EvidenceMetric> {
531    let mut metrics = Vec::new();
532    if let Some(command) = commands.iter().find(|command| command.id == "coverage") {
533        if let Some(percent) = last_percent(&format!("{}\n{}", command.stdout, command.stderr)) {
534            metrics.push(EvidenceMetric {
535                id: "coverage-percent".to_string(),
536                label: "Line coverage".to_string(),
537                value: percent,
538                unit: "percent".to_string(),
539                source_command: command.id.clone(),
540            });
541        }
542    }
543    if let Some(command) = commands.iter().find(|command| command.id == "mutation") {
544        if let Some(percent) = last_percent(&format!("{}\n{}", command.stdout, command.stderr)) {
545            metrics.push(EvidenceMetric {
546                id: "mutation-score-percent".to_string(),
547                label: "Mutation score".to_string(),
548                value: percent,
549                unit: "percent".to_string(),
550                source_command: command.id.clone(),
551            });
552        }
553    }
554    metrics
555}
556
557fn last_percent(output: &str) -> Option<f64> {
558    output
559        .split_whitespace()
560        .filter_map(|token| token.trim_end_matches('%').parse::<f64>().ok())
561        .next_back()
562}
563
564fn grade_from_commands(commands: &[EvidenceCommandRecord]) -> EvidenceGrade {
565    let metadata_ok = command_success(commands, "cargo-metadata");
566    if !metadata_ok {
567        return EvidenceGrade::None;
568    }
569    let tests_ok = command_success(commands, "cargo-test");
570    if !tests_ok {
571        return EvidenceGrade::Compiled;
572    }
573    let coverage_ok = command_success(commands, "coverage");
574    let mutation_ok = command_success(commands, "mutation");
575    let semver_ok = command_success(commands, "semver-checks");
576    if coverage_ok && mutation_ok && semver_ok {
577        EvidenceGrade::Proven
578    } else if coverage_ok && mutation_ok {
579        EvidenceGrade::Hardened
580    } else if coverage_ok {
581        EvidenceGrade::Covered
582    } else {
583        EvidenceGrade::Tested
584    }
585}
586
587fn command_success(commands: &[EvidenceCommandRecord], id: &str) -> bool {
588    commands
589        .iter()
590        .any(|command| command.id == id && command.success)
591}
592
593fn analysis_depth_for_grade(grade: EvidenceGrade) -> EvidenceAnalysisDepth {
594    match grade {
595        EvidenceGrade::None => EvidenceAnalysisDepth::None,
596        EvidenceGrade::Compiled => EvidenceAnalysisDepth::Mechanical,
597        EvidenceGrade::Tested => EvidenceAnalysisDepth::BoundaryAware,
598        EvidenceGrade::Covered | EvidenceGrade::Hardened | EvidenceGrade::Proven => {
599            EvidenceAnalysisDepth::Structural
600        }
601    }
602}
603
604fn unlocked_recipe_tiers(grade: EvidenceGrade) -> Vec<String> {
605    let mut tiers = Vec::new();
606    if grade >= EvidenceGrade::Compiled {
607        tiers.push("Tier 1 mechanical recipes".to_string());
608    }
609    if grade >= EvidenceGrade::Covered {
610        tiers.push("Tier 2 structural mechanical recipes".to_string());
611    }
612    if grade >= EvidenceGrade::Hardened {
613        tiers.push("Tier 3 semantic planning candidates".to_string());
614    }
615    tiers
616}
617
618fn unlock_suggestions(grade: EvidenceGrade, config: &EvidenceRunConfig) -> Vec<String> {
619    let mut suggestions = Vec::new();
620    if grade < EvidenceGrade::Tested {
621        suggestions.push("Make `cargo test` pass to unlock tested evidence.".to_string());
622    }
623    if !config.include_coverage {
624        suggestions.push(
625            "Run `mdx-rust evidence --include-coverage` after installing cargo-llvm-cov to unlock Tier 2 autonomous recipes.".to_string(),
626        );
627    }
628    if !config.include_mutation {
629        suggestions.push(
630            "Run `mdx-rust evidence --include-mutation` after installing cargo-mutants to unlock hardened autonomy.".to_string(),
631        );
632    }
633    suggestions
634}
635
636fn evidence_note(grade: EvidenceGrade) -> String {
637    match grade {
638        EvidenceGrade::None => "no usable Cargo evidence was collected".to_string(),
639        EvidenceGrade::Compiled => {
640            "Cargo metadata exists, but tests did not pass during evidence collection".to_string()
641        }
642        EvidenceGrade::Tested => {
643            "tests passed; Tier 1 autonomy is allowed and Tier 2 remains gated by coverage"
644                .to_string()
645        }
646        EvidenceGrade::Covered => {
647            "tests and coverage passed; Tier 2 structural mechanical recipes may run".to_string()
648        }
649        EvidenceGrade::Hardened => {
650            "tests, coverage, and mutation evidence passed; hardened autonomy is unlocked"
651                .to_string()
652        }
653        EvidenceGrade::Proven => {
654            "tests, coverage, mutation, and semver evidence passed; highest autonomy is unlocked"
655                .to_string()
656        }
657    }
658}
659
660fn evidence_run_id(
661    root: &Path,
662    target: Option<&Path>,
663    commands: &[EvidenceCommandRecord],
664) -> String {
665    let mut bytes = Vec::new();
666    bytes.extend_from_slice(root.display().to_string().as_bytes());
667    bytes.extend_from_slice(format!("{target:?}").as_bytes());
668    bytes.extend_from_slice(format!("{commands:?}").as_bytes());
669    stable_hash_hex(&bytes)
670}
671
672fn persist_evidence_run(artifact_root: &Path, run: &EvidenceRun) -> anyhow::Result<PathBuf> {
673    let dir = artifact_root.join("evidence");
674    std::fs::create_dir_all(&dir)?;
675    let millis = std::time::SystemTime::now()
676        .duration_since(std::time::UNIX_EPOCH)
677        .map(|duration| duration.as_millis())
678        .unwrap_or(0);
679    Ok(dir.join(format!(
680        "evidence-{millis}-{}.json",
681        sanitize_id(&run.run_id)
682    )))
683}
684
685fn sanitize_id(value: &str) -> String {
686    value
687        .chars()
688        .map(|ch| if ch.is_ascii_alphanumeric() { ch } else { '-' })
689        .collect::<String>()
690        .trim_matches('-')
691        .to_string()
692}
693
694fn executable_exists(name: &str) -> bool {
695    let Some(path_var) = std::env::var_os("PATH") else {
696        return false;
697    };
698    std::env::split_paths(&path_var).any(|dir| dir.join(name).is_file())
699}
700
701#[cfg(test)]
702mod tests {
703    use super::*;
704
705    #[test]
706    fn evidence_metrics_parse_percentages_from_tool_output() {
707        let commands = vec![
708            EvidenceCommandRecord {
709                id: "coverage".to_string(),
710                command: "cargo llvm-cov --workspace --summary-only".to_string(),
711                skipped: false,
712                skip_reason: None,
713                success: true,
714                timed_out: false,
715                status_code: Some(0),
716                duration_ms: 12,
717                stdout: "total 91.7%".to_string(),
718                stderr: String::new(),
719            },
720            EvidenceCommandRecord {
721                id: "mutation".to_string(),
722                command: "cargo mutants --no-shuffle --timeout 60".to_string(),
723                skipped: false,
724                skip_reason: None,
725                success: true,
726                timed_out: false,
727                status_code: Some(0),
728                duration_ms: 12,
729                stdout: String::new(),
730                stderr: "mutation score 82.5%".to_string(),
731            },
732        ];
733
734        let metrics = evidence_metrics(&commands);
735
736        assert!(metrics
737            .iter()
738            .any(|metric| metric.id == "coverage-percent"
739                && (metric.value - 91.7).abs() < f64::EPSILON));
740        assert!(metrics
741            .iter()
742            .any(|metric| metric.id == "mutation-score-percent"
743                && (metric.value - 82.5).abs() < f64::EPSILON));
744    }
745}