perspt_agent/
test_runner.rs

1//! Verification Runners
2//!
3//! Provides test, syntax-check, build, and lint execution for language plugins.
4//!
5//! - `PythonTestRunner`: pytest-specific runner with detailed output parsing.
6//! - `RustTestRunner`: cargo-based runner with test output parsing.
7//! - `PluginVerifierRunner` (PSP-5 Phase 4): generic runner driven entirely by
8//!   a plugin's `VerifierProfile`. It executes whatever commands the profile
9//!   declares, including fallback commands, without hardcoding language details.
10//!
11//! The `TestRunnerTrait` is the unified async interface consumed by the orchestrator.
12
13use anyhow::{Context, Result};
14use std::path::{Path, PathBuf};
15use std::process::Stdio;
16use tokio::process::Command;
17
18use crate::types::{BehavioralContract, Criticality};
19use perspt_core::plugin::{VerifierProfile, VerifierStage};
20
21/// Result of a test run
22#[derive(Debug, Clone, Default)]
23pub struct TestResults {
24    /// Number of passed tests
25    pub passed: usize,
26    /// Number of failed tests
27    pub failed: usize,
28    /// Number of skipped tests
29    pub skipped: usize,
30    /// Total tests run
31    pub total: usize,
32    /// Detailed failure information
33    pub failures: Vec<TestFailure>,
34    /// Duration in milliseconds
35    pub duration_ms: u64,
36    /// Raw output
37    pub output: String,
38    /// Whether the test run was successful (no infrastructure errors)
39    pub run_succeeded: bool,
40}
41
42impl TestResults {
43    /// Check if all tests passed
44    pub fn all_passed(&self) -> bool {
45        self.run_succeeded && self.failed == 0
46    }
47
48    /// Get pass rate as percentage
49    pub fn pass_rate(&self) -> f32 {
50        if self.total == 0 {
51            1.0
52        } else {
53            (self.passed as f32) / (self.total as f32)
54        }
55    }
56}
57
58/// Information about a single test failure
59#[derive(Debug, Clone)]
60pub struct TestFailure {
61    /// Test name (e.g., "test_divide_by_zero")
62    pub name: String,
63    /// Test file path
64    pub file: Option<String>,
65    /// Line number where failure occurred
66    pub line: Option<u32>,
67    /// Error message
68    pub message: String,
69    /// Criticality (from weighted tests if matched)
70    pub criticality: Criticality,
71}
72
73fn force_failure_on_nonzero_exit(
74    results: &mut TestResults,
75    command_name: &str,
76    exit_code: Option<i32>,
77    output: &str,
78) {
79    if results.failed == 0 {
80        results.failed = 1;
81    }
82    if results.total == 0 {
83        results.total = results.passed + results.failed + results.skipped;
84    }
85    if results.failures.is_empty() {
86        results.failures.push(TestFailure {
87            name: command_name.to_string(),
88            file: None,
89            line: None,
90            message: format!(
91                "{} exited with code {:?} without a parseable success summary. Output:\n{}",
92                command_name, exit_code, output
93            ),
94            criticality: Criticality::High,
95        });
96    }
97}
98
99/// Python test runner using uv and pytest
100///
101/// Handles:
102/// 1. Checking for pyproject.toml
103/// 2. Setting up Python environment via uv
104/// 3. Running pytest
105/// 4. Parsing results for V_log calculation
106pub struct PythonTestRunner {
107    /// Working directory (workspace root)
108    working_dir: PathBuf,
109    /// Timeout in seconds
110    timeout_secs: u64,
111    /// Whether to auto-setup if no pyproject.toml
112    auto_setup: bool,
113}
114
115impl PythonTestRunner {
116    /// Create a new Python test runner
117    pub fn new(working_dir: PathBuf) -> Self {
118        Self {
119            working_dir,
120            timeout_secs: 300, // 5 minute default timeout
121            auto_setup: true,
122        }
123    }
124
125    /// Set timeout
126    pub fn with_timeout(mut self, secs: u64) -> Self {
127        self.timeout_secs = secs;
128        self
129    }
130
131    /// Disable auto-setup (don't create pyproject.toml if missing)
132    pub fn without_auto_setup(mut self) -> Self {
133        self.auto_setup = false;
134        self
135    }
136
137    /// Check if workspace has a Python project setup
138    pub fn has_pyproject(&self) -> bool {
139        self.working_dir.join("pyproject.toml").exists()
140    }
141
142    /// Check if workspace has pytest configured
143    pub async fn has_pytest(&self) -> bool {
144        // Check if pytest is in pyproject.toml or can be run
145        let result = Command::new("uv")
146            .args(["run", "pytest", "--version"])
147            .current_dir(&self.working_dir)
148            .stdout(Stdio::null())
149            .stderr(Stdio::null())
150            .status()
151            .await;
152
153        result.map(|s| s.success()).unwrap_or(false)
154    }
155
156    /// Initialize the Python environment with uv
157    /// NOTE: This assumes pyproject.toml already exists (created by orchestrator's step_init_project)
158    pub async fn setup_environment(&self) -> Result<()> {
159        log::info!("Setting up Python environment with uv");
160
161        // Check if pyproject.toml exists; if not, warn and try to proceed
162        if !self.has_pyproject() {
163            if self.auto_setup {
164                log::warn!(
165                    "No pyproject.toml found. Project should be initialized via 'uv init' first."
166                );
167                log::info!("Attempting to run 'uv init --lib' as fallback...");
168                let init_output = Command::new("uv")
169                    .args(["init", "--lib"])
170                    .current_dir(&self.working_dir)
171                    .stdout(Stdio::piped())
172                    .stderr(Stdio::piped())
173                    .output()
174                    .await
175                    .context("Failed to run uv init")?;
176
177                if !init_output.status.success() {
178                    let stderr = String::from_utf8_lossy(&init_output.stderr);
179                    log::warn!("uv init failed: {}", stderr);
180                    return self.install_pytest_directly().await;
181                }
182            } else {
183                anyhow::bail!(
184                    "No pyproject.toml found and auto_setup is disabled. Run 'uv init' first."
185                );
186            }
187        }
188
189        // Sync dependencies (this creates venv and installs deps)
190        let output = Command::new("uv")
191            .args(["sync", "--dev"])
192            .current_dir(&self.working_dir)
193            .stdout(Stdio::piped())
194            .stderr(Stdio::piped())
195            .output()
196            .await
197            .context("Failed to run uv sync")?;
198
199        if !output.status.success() {
200            let stderr = String::from_utf8_lossy(&output.stderr);
201            log::warn!("uv sync failed: {}", stderr);
202            // Try just installing pytest directly
203            return self.install_pytest_directly().await;
204        }
205
206        // Ensure pytest is available as a dev dependency.
207        // `uv sync --dev` only installs what's already in pyproject.toml;
208        // for freshly-generated projects pytest may not be declared yet.
209        if !self.has_pytest().await {
210            log::info!("pytest not available after sync — adding as dev dependency");
211            let add_output = Command::new("uv")
212                .args(["add", "--dev", "pytest"])
213                .current_dir(&self.working_dir)
214                .stdout(Stdio::piped())
215                .stderr(Stdio::piped())
216                .output()
217                .await;
218            match add_output {
219                Ok(o) if o.status.success() => {
220                    log::info!("Added pytest as dev dependency");
221                }
222                Ok(o) => {
223                    let stderr = String::from_utf8_lossy(&o.stderr);
224                    log::warn!("uv add --dev pytest failed: {}", stderr);
225                    // Last resort: install directly
226                    return self.install_pytest_directly().await;
227                }
228                Err(e) => {
229                    log::warn!("Failed to run uv add --dev pytest: {}", e);
230                    return self.install_pytest_directly().await;
231                }
232            }
233        }
234
235        log::info!("Python environment ready");
236        Ok(())
237    }
238
239    /// Install pytest directly without a full project setup
240    async fn install_pytest_directly(&self) -> Result<()> {
241        log::info!("Installing pytest via uv pip");
242
243        let output = Command::new("uv")
244            .args(["pip", "install", "pytest"])
245            .current_dir(&self.working_dir)
246            .stdout(Stdio::piped())
247            .stderr(Stdio::piped())
248            .output()
249            .await
250            .context("Failed to install pytest")?;
251
252        if !output.status.success() {
253            let stderr = String::from_utf8_lossy(&output.stderr);
254            anyhow::bail!("Failed to install pytest: {}", stderr);
255        }
256
257        Ok(())
258    }
259
260    /// Run pytest and parse results
261    ///
262    /// If environment is not set up, will attempt to set it up first.
263    pub async fn run_pytest(&self, test_args: &[&str]) -> Result<TestResults> {
264        log::info!("Running pytest in {}", self.working_dir.display());
265
266        // Ensure environment is set up
267        if !self.has_pytest().await {
268            self.setup_environment().await?;
269        }
270
271        // Build pytest command
272        let mut args = vec!["run", "pytest", "-v", "--tb=short"];
273        args.extend(test_args);
274
275        let start = std::time::Instant::now();
276
277        let output = Command::new("uv")
278            .args(&args)
279            .current_dir(&self.working_dir)
280            .stdout(Stdio::piped())
281            .stderr(Stdio::piped())
282            .output()
283            .await
284            .context("Failed to run pytest")?;
285
286        let duration_ms = start.elapsed().as_millis() as u64;
287        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
288        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
289        let combined = format!("{}\n{}", stdout, stderr);
290
291        log::debug!("pytest exit code: {:?}", output.status.code());
292        if !stdout.is_empty() {
293            log::debug!("pytest stdout:\n{}", stdout);
294        }
295
296        let mut results = self.parse_pytest_output(&combined, duration_ms);
297        results.run_succeeded = true; // We got output, run worked
298        if !output.status.success() {
299            force_failure_on_nonzero_exit(&mut results, "pytest", output.status.code(), &combined);
300        }
301
302        // Log summary
303        if results.all_passed() {
304            log::info!("✅ Tests passed: {}/{}", results.passed, results.total);
305        } else {
306            log::info!(
307                "❌ Tests failed: {} passed, {} failed",
308                results.passed,
309                results.failed
310            );
311        }
312
313        Ok(results)
314    }
315
316    /// Run pytest on specific test files
317    pub async fn run_test_files(&self, test_files: &[&Path]) -> Result<TestResults> {
318        let file_args: Vec<&str> = test_files.iter().filter_map(|p| p.to_str()).collect();
319
320        self.run_pytest(&file_args).await
321    }
322
323    /// Parse pytest output into TestResults
324    fn parse_pytest_output(&self, output: &str, duration_ms: u64) -> TestResults {
325        let mut results = TestResults {
326            duration_ms,
327            output: output.to_string(),
328            ..Default::default()
329        };
330
331        // Parse summary line: "X passed, Y failed, Z skipped in 0.12s"
332        for line in output.lines() {
333            let line = line.trim();
334
335            // Look for summary patterns (usually starts with = signs)
336            if (line.contains("passed") || line.contains("failed") || line.contains("error"))
337                && (line.contains(" in ") || line.starts_with('='))
338            {
339                let parts: Vec<&str> = line.split_whitespace().collect();
340                for i in 0..parts.len() {
341                    if parts[i] == "passed" || parts[i] == "passed," {
342                        if i > 0 {
343                            if let Ok(n) = parts[i - 1].trim_matches(',').parse::<usize>() {
344                                results.passed = n;
345                            }
346                        }
347                    } else if parts[i] == "failed" || parts[i] == "failed," {
348                        if i > 0 {
349                            if let Ok(n) = parts[i - 1].trim_matches(',').parse::<usize>() {
350                                results.failed = n;
351                            }
352                        }
353                    } else if parts[i] == "skipped" || parts[i] == "skipped," {
354                        if i > 0 {
355                            if let Ok(n) = parts[i - 1].trim_matches(',').parse::<usize>() {
356                                results.skipped = n;
357                            }
358                        }
359                    } else if (parts[i] == "error" || parts[i] == "errors") && i > 0 {
360                        if let Ok(n) = parts[i - 1].trim_matches(',').parse::<usize>() {
361                            results.failed += n;
362                        }
363                    }
364                }
365            }
366
367            // Parse individual test failures
368            // "FAILED test_file.py::TestClass::test_method - AssertionError"
369            if line.starts_with("FAILED ") {
370                let failure = self.parse_failure_line(line);
371                results.failures.push(failure);
372            }
373        }
374
375        results.total = results.passed + results.failed + results.skipped;
376        results
377    }
378
379    /// Parse a pytest FAILED line
380    fn parse_failure_line(&self, line: &str) -> TestFailure {
381        // Format: "FAILED test_file.py::TestClass::test_method - Error message"
382        let rest = line.strip_prefix("FAILED ").unwrap_or(line);
383
384        let (test_path, message) = if let Some(idx) = rest.find(" - ") {
385            (&rest[..idx], rest[idx + 3..].to_string())
386        } else {
387            (rest, String::new())
388        };
389
390        // Parse test path (file::class::method or file::method)
391        let parts: Vec<&str> = test_path.split("::").collect();
392        let (file, name) = if parts.len() >= 2 {
393            (
394                Some(parts[0].to_string()),
395                parts.last().unwrap_or(&"").to_string(),
396            )
397        } else {
398            (None, test_path.to_string())
399        };
400
401        TestFailure {
402            name,
403            file,
404            line: None,
405            message,
406            criticality: Criticality::High, // Default, will be updated by match_weighted_tests
407        }
408    }
409
410    /// Calculate V_log (Logic Energy) from test results and behavioral contract
411    /// Uses weighted tests from the contract to determine criticality
412    pub fn calculate_v_log(&self, results: &TestResults, contract: &BehavioralContract) -> f32 {
413        let gamma = contract.gamma(); // Default 2.0
414        let mut v_log = 0.0;
415
416        for failure in &results.failures {
417            // Find matching weighted test from contract
418            let weight = contract
419                .weighted_tests
420                .iter()
421                .find(|wt| {
422                    failure.name.contains(&wt.test_name) || wt.test_name.contains(&failure.name)
423                })
424                .map(|wt| wt.criticality.weight())
425                .unwrap_or(Criticality::High.weight()); // Default to High if no match
426
427            v_log += gamma * weight;
428        }
429
430        v_log
431    }
432
433    /// Match test failures with weighted tests from contract to set criticality
434    pub fn match_weighted_tests(&self, results: &mut TestResults, contract: &BehavioralContract) {
435        for failure in &mut results.failures {
436            if let Some(wt) = contract.weighted_tests.iter().find(|wt| {
437                failure.name.contains(&wt.test_name) || wt.test_name.contains(&failure.name)
438            }) {
439                failure.criticality = wt.criticality;
440            }
441        }
442    }
443}
444
445// =============================================================================
446// PSP-5: Generic Test Runner Trait
447// =============================================================================
448
449/// PSP-5: Language-agnostic test runner trait
450///
451/// Allows the orchestrator to run verification steps through any language's
452/// toolchain without hardcoding Python paths.
453#[async_trait::async_trait]
454pub trait TestRunnerTrait: Send + Sync {
455    /// Run syntax/type check (e.g., `cargo check`, `uv run ty check .`)
456    async fn run_syntax_check(&self) -> Result<TestResults>;
457
458    /// Run the test suite (e.g., `cargo test`, `uv run pytest`)
459    async fn run_tests(&self) -> Result<TestResults>;
460
461    /// Run build check (e.g., `cargo build`)
462    async fn run_build_check(&self) -> Result<TestResults>;
463
464    /// Run lint check (e.g., `cargo clippy`, `uv run ruff check .`)
465    ///
466    /// Default: returns a no-op pass for plugins without a lint stage.
467    async fn run_lint(&self) -> Result<TestResults> {
468        Ok(TestResults {
469            passed: 1,
470            total: 1,
471            run_succeeded: true,
472            output: "No lint stage configured".to_string(),
473            ..Default::default()
474        })
475    }
476
477    /// Run a specific verifier stage by enum variant.
478    ///
479    /// Dispatches to the appropriate method. Convenience for generic callers.
480    async fn run_stage(&self, stage: VerifierStage) -> Result<TestResults> {
481        match stage {
482            VerifierStage::SyntaxCheck => self.run_syntax_check().await,
483            VerifierStage::Build => self.run_build_check().await,
484            VerifierStage::Test => self.run_tests().await,
485            VerifierStage::Lint => self.run_lint().await,
486        }
487    }
488
489    /// Name of the runner (for logging)
490    fn name(&self) -> &str;
491}
492
493#[async_trait::async_trait]
494impl TestRunnerTrait for PythonTestRunner {
495    async fn run_syntax_check(&self) -> Result<TestResults> {
496        // Use ty (via uv) for type checking
497        let output = Command::new("uv")
498            .args(["run", "ty", "check", "."])
499            .current_dir(&self.working_dir)
500            .stdout(Stdio::piped())
501            .stderr(Stdio::piped())
502            .output()
503            .await
504            .context("Failed to run ty check")?;
505
506        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
507        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
508
509        Ok(TestResults {
510            passed: if output.status.success() { 1 } else { 0 },
511            failed: if output.status.success() { 0 } else { 1 },
512            total: 1,
513            run_succeeded: true,
514            output: format!("{}\n{}", stdout, stderr),
515            ..Default::default()
516        })
517    }
518
519    async fn run_tests(&self) -> Result<TestResults> {
520        self.run_pytest(&[]).await
521    }
522
523    async fn run_build_check(&self) -> Result<TestResults> {
524        // Python doesn't have a separate build step
525        Ok(TestResults {
526            passed: 1,
527            total: 1,
528            run_succeeded: true,
529            output: "No build step for Python".to_string(),
530            ..Default::default()
531        })
532    }
533
534    async fn run_lint(&self) -> Result<TestResults> {
535        let output = Command::new("uv")
536            .args(["run", "ruff", "check", "."])
537            .current_dir(&self.working_dir)
538            .stdout(Stdio::piped())
539            .stderr(Stdio::piped())
540            .output()
541            .await
542            .context("Failed to run ruff check")?;
543
544        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
545        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
546
547        Ok(TestResults {
548            passed: if output.status.success() { 1 } else { 0 },
549            failed: if output.status.success() { 0 } else { 1 },
550            total: 1,
551            run_succeeded: true,
552            output: format!("{}\n{}", stdout, stderr),
553            ..Default::default()
554        })
555    }
556
557    fn name(&self) -> &str {
558        "python"
559    }
560}
561
562/// PSP-5: Rust test runner using cargo
563pub struct RustTestRunner {
564    /// Working directory (workspace root)
565    working_dir: PathBuf,
566}
567
568impl RustTestRunner {
569    /// Create a new Rust test runner
570    pub fn new(working_dir: PathBuf) -> Self {
571        Self { working_dir }
572    }
573
574    /// Parse `cargo test` output for pass/fail counts
575    fn parse_cargo_test_output(&self, output: &str) -> TestResults {
576        let mut results = TestResults {
577            output: output.to_string(),
578            run_succeeded: true,
579            ..Default::default()
580        };
581
582        for line in output.lines() {
583            let line = line.trim();
584
585            // Parse "test result: ok. X passed; Y failed; Z ignored"
586            if line.starts_with("test result:") {
587                let parts: Vec<&str> = line.split_whitespace().collect();
588                for i in 0..parts.len() {
589                    if (parts[i] == "passed;" || parts[i] == "passed") && i > 0 {
590                        if let Ok(n) = parts[i - 1].parse::<usize>() {
591                            results.passed = n;
592                        }
593                    } else if (parts[i] == "failed;" || parts[i] == "failed") && i > 0 {
594                        if let Ok(n) = parts[i - 1].parse::<usize>() {
595                            results.failed = n;
596                        }
597                    } else if (parts[i] == "ignored;" || parts[i] == "ignored") && i > 0 {
598                        if let Ok(n) = parts[i - 1].parse::<usize>() {
599                            results.skipped = n;
600                        }
601                    }
602                }
603            }
604        }
605
606        results.total = results.passed + results.failed + results.skipped;
607        results
608    }
609}
610
611#[async_trait::async_trait]
612impl TestRunnerTrait for RustTestRunner {
613    async fn run_syntax_check(&self) -> Result<TestResults> {
614        let output = Command::new("cargo")
615            .args(["check"])
616            .current_dir(&self.working_dir)
617            .stdout(Stdio::piped())
618            .stderr(Stdio::piped())
619            .output()
620            .await
621            .context("Failed to run cargo check")?;
622
623        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
624
625        Ok(TestResults {
626            passed: if output.status.success() { 1 } else { 0 },
627            failed: if output.status.success() { 0 } else { 1 },
628            total: 1,
629            run_succeeded: true,
630            output: stderr,
631            ..Default::default()
632        })
633    }
634
635    async fn run_tests(&self) -> Result<TestResults> {
636        let output = Command::new("cargo")
637            .args(["test"])
638            .current_dir(&self.working_dir)
639            .stdout(Stdio::piped())
640            .stderr(Stdio::piped())
641            .output()
642            .await
643            .context("Failed to run cargo test")?;
644
645        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
646        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
647        let combined = format!("{}\n{}", stdout, stderr);
648
649        let mut results = self.parse_cargo_test_output(&combined);
650        results.run_succeeded = true;
651        if !output.status.success() {
652            force_failure_on_nonzero_exit(
653                &mut results,
654                "cargo test",
655                output.status.code(),
656                &combined,
657            );
658        }
659        Ok(results)
660    }
661
662    async fn run_build_check(&self) -> Result<TestResults> {
663        let output = Command::new("cargo")
664            .args(["build"])
665            .current_dir(&self.working_dir)
666            .stdout(Stdio::piped())
667            .stderr(Stdio::piped())
668            .output()
669            .await
670            .context("Failed to run cargo build")?;
671
672        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
673
674        Ok(TestResults {
675            passed: if output.status.success() { 1 } else { 0 },
676            failed: if output.status.success() { 0 } else { 1 },
677            total: 1,
678            run_succeeded: true,
679            output: stderr,
680            ..Default::default()
681        })
682    }
683
684    async fn run_lint(&self) -> Result<TestResults> {
685        let output = Command::new("cargo")
686            .args(["clippy", "--", "-D", "warnings"])
687            .current_dir(&self.working_dir)
688            .stdout(Stdio::piped())
689            .stderr(Stdio::piped())
690            .output()
691            .await
692            .context("Failed to run cargo clippy")?;
693
694        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
695
696        Ok(TestResults {
697            passed: if output.status.success() { 1 } else { 0 },
698            failed: if output.status.success() { 0 } else { 1 },
699            total: 1,
700            run_succeeded: true,
701            output: stderr,
702            ..Default::default()
703        })
704    }
705
706    fn name(&self) -> &str {
707        "rust"
708    }
709}
710
711// =============================================================================
712// PSP-5 Phase 4: Plugin-Driven Verifier Runner
713// =============================================================================
714
715/// Generic verifier runner driven by a plugin's `VerifierProfile`.
716///
717/// Instead of hardcoding language-specific commands, this runner reads the
718/// profile's `VerifierCapability` entries and executes the best available
719/// command (primary → fallback → skip) for each stage.
720///
721/// For languages with detailed output parsers (e.g., pytest, cargo test),
722/// prefer the language-specific runners. `PluginVerifierRunner` is the
723/// fallback for plugins that don't have a dedicated runner or when the
724/// orchestrator wants uniform dispatch across all detected plugins.
725pub struct PluginVerifierRunner {
726    /// Working directory for command execution.
727    working_dir: PathBuf,
728    /// Snapshot of the plugin's verifier capabilities.
729    profile: VerifierProfile,
730}
731
732impl PluginVerifierRunner {
733    /// Create a new runner from a plugin's verifier profile.
734    pub fn new(working_dir: PathBuf, profile: VerifierProfile) -> Self {
735        Self {
736            working_dir,
737            profile,
738        }
739    }
740
741    /// Execute a shell command string, returning a `TestResults`.
742    ///
743    /// The command is split on whitespace for arg parsing. This is
744    /// intentionally simple; complex pipelines should use `sh -c`.
745    ///
746    /// PSP-5 Phase 4: Commands pass through policy sanitization and
747    /// workspace-bound validation before execution.
748    async fn exec_command(&self, command: &str, stage: VerifierStage) -> Result<TestResults> {
749        // Sanitize command through policy
750        let sr = perspt_policy::sanitize_command(command)?;
751        if sr.rejected {
752            anyhow::bail!(
753                "{} command rejected by policy: {}",
754                stage,
755                sr.rejection_reason.unwrap_or_default()
756            );
757        }
758        for warning in &sr.warnings {
759            log::warn!(
760                "[{}] policy warning for {} stage: {}",
761                self.profile.plugin_name,
762                stage,
763                warning
764            );
765        }
766
767        // Validate workspace bounds
768        perspt_policy::validate_workspace_bound(command, &self.working_dir)?;
769
770        let parts: Vec<&str> = command.split_whitespace().collect();
771        if parts.is_empty() {
772            anyhow::bail!("empty command for stage {}", stage);
773        }
774
775        let program = parts[0];
776        let args = &parts[1..];
777
778        log::info!(
779            "[{}] running {} stage: {}",
780            self.profile.plugin_name,
781            stage,
782            command
783        );
784
785        let output = Command::new(program)
786            .args(args)
787            .current_dir(&self.working_dir)
788            .stdout(Stdio::piped())
789            .stderr(Stdio::piped())
790            .output()
791            .await
792            .with_context(|| format!("Failed to run {} for {} stage", command, stage))?;
793
794        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
795        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
796
797        Ok(TestResults {
798            passed: if output.status.success() { 1 } else { 0 },
799            failed: if output.status.success() { 0 } else { 1 },
800            total: 1,
801            run_succeeded: true,
802            output: format!("{}\n{}", stdout, stderr),
803            ..Default::default()
804        })
805    }
806
807    /// Run a verifier stage using the profile's best available command.
808    ///
809    /// Returns a no-op pass if the stage is not declared or has no available tool.
810    async fn run_profile_stage(&self, stage: VerifierStage) -> Result<TestResults> {
811        let cap = match self.profile.get(stage) {
812            Some(c) => c,
813            None => {
814                return Ok(TestResults {
815                    passed: 1,
816                    total: 1,
817                    run_succeeded: true,
818                    output: format!(
819                        "No {} stage declared for {}",
820                        stage, self.profile.plugin_name
821                    ),
822                    ..Default::default()
823                });
824            }
825        };
826
827        match cap.effective_command() {
828            Some(cmd) => self.exec_command(cmd, stage).await,
829            None => {
830                log::warn!(
831                    "[{}] {} stage declared but no tool available (degraded)",
832                    self.profile.plugin_name,
833                    stage
834                );
835                Ok(TestResults {
836                    passed: 0,
837                    failed: 0,
838                    total: 0,
839                    run_succeeded: false,
840                    output: format!(
841                        "{} stage skipped: no tool available for {}",
842                        stage, self.profile.plugin_name
843                    ),
844                    ..Default::default()
845                })
846            }
847        }
848    }
849
850    /// Run all available stages in order, returning results keyed by stage.
851    pub async fn run_all_stages(&self) -> Vec<(VerifierStage, Result<TestResults>)> {
852        let stages = [
853            VerifierStage::SyntaxCheck,
854            VerifierStage::Build,
855            VerifierStage::Test,
856            VerifierStage::Lint,
857        ];
858        let mut results = Vec::new();
859        for stage in stages {
860            if self.profile.get(stage).is_some() {
861                results.push((stage, self.run_profile_stage(stage).await));
862            }
863        }
864        results
865    }
866
867    /// Get the underlying profile.
868    pub fn profile(&self) -> &VerifierProfile {
869        &self.profile
870    }
871}
872
873#[async_trait::async_trait]
874impl TestRunnerTrait for PluginVerifierRunner {
875    async fn run_syntax_check(&self) -> Result<TestResults> {
876        self.run_profile_stage(VerifierStage::SyntaxCheck).await
877    }
878
879    async fn run_tests(&self) -> Result<TestResults> {
880        self.run_profile_stage(VerifierStage::Test).await
881    }
882
883    async fn run_build_check(&self) -> Result<TestResults> {
884        self.run_profile_stage(VerifierStage::Build).await
885    }
886
887    async fn run_lint(&self) -> Result<TestResults> {
888        self.run_profile_stage(VerifierStage::Lint).await
889    }
890
891    fn name(&self) -> &str {
892        &self.profile.plugin_name
893    }
894}
895
896/// PSP-5: Factory function to create a test runner for a given plugin
897pub fn test_runner_for_plugin(plugin_name: &str, working_dir: PathBuf) -> Box<dyn TestRunnerTrait> {
898    match plugin_name {
899        "rust" => Box::new(RustTestRunner::new(working_dir)),
900        "python" => Box::new(PythonTestRunner::new(working_dir)),
901        _ => Box::new(PythonTestRunner::new(working_dir)), // Default fallback
902    }
903}
904
905/// PSP-5 Phase 4: Create a runner from a verifier profile.
906///
907/// For Rust and Python, this returns the specialised runner (which has
908/// detailed output parsing). For anything else it returns a generic
909/// `PluginVerifierRunner` that executes whatever commands the profile declares.
910pub fn test_runner_for_profile(
911    profile: VerifierProfile,
912    working_dir: PathBuf,
913) -> Box<dyn TestRunnerTrait> {
914    match profile.plugin_name.as_str() {
915        "rust" => Box::new(RustTestRunner::new(working_dir)),
916        "python" => Box::new(PythonTestRunner::new(working_dir)),
917        _ => Box::new(PluginVerifierRunner::new(working_dir, profile)),
918    }
919}
920
921// Re-export PythonTestRunner as TestRunner for backward compatibility
922pub type TestRunner = PythonTestRunner;
923
924#[cfg(test)]
925mod tests {
926    use super::*;
927    use crate::types::WeightedTest;
928    use perspt_core::plugin::{
929        LanguagePlugin, LspCapability, LspConfig, VerifierCapability, VerifierProfile,
930    };
931
932    #[test]
933    fn test_parse_pytest_summary() {
934        let runner = PythonTestRunner::new(PathBuf::from("."));
935
936        let output = "===== 3 passed, 2 failed, 1 skipped in 0.12s =====";
937        let results = runner.parse_pytest_output(output, 120);
938
939        assert_eq!(results.passed, 3);
940        assert_eq!(results.failed, 2);
941        assert_eq!(results.skipped, 1);
942        assert_eq!(results.total, 6);
943    }
944
945    #[test]
946    fn test_parse_pytest_failure_line() {
947        let runner = PythonTestRunner::new(PathBuf::from("."));
948
949        let line = "FAILED test_calculator.py::TestDivide::test_divide_by_zero - ZeroDivisionError";
950        let failure = runner.parse_failure_line(line);
951
952        assert_eq!(failure.name, "test_divide_by_zero");
953        assert_eq!(failure.file, Some("test_calculator.py".to_string()));
954        assert!(failure.message.contains("ZeroDivisionError"));
955    }
956
957    #[test]
958    fn test_force_failure_on_nonzero_exit_marks_failure() {
959        let mut results = TestResults::default();
960
961        force_failure_on_nonzero_exit(&mut results, "pytest", Some(2), "collection error");
962
963        assert_eq!(results.failed, 1);
964        assert_eq!(results.total, 1);
965        assert_eq!(results.failures.len(), 1);
966        assert!(results.failures[0].message.contains("collection error"));
967    }
968
969    #[test]
970    fn test_calculate_v_log() {
971        let runner = PythonTestRunner::new(PathBuf::from("."));
972
973        let results = TestResults {
974            failures: vec![TestFailure {
975                name: "test_critical_feature".to_string(),
976                file: None,
977                line: None,
978                message: String::new(),
979                criticality: Criticality::Critical,
980            }],
981            ..Default::default()
982        };
983
984        let mut contract = BehavioralContract::new();
985        contract.weighted_tests = vec![WeightedTest {
986            test_name: "test_critical_feature".to_string(),
987            criticality: Criticality::Critical,
988        }];
989
990        let v_log = runner.calculate_v_log(&results, &contract);
991        // gamma (2.0) * Critical weight (10.0) = 20.0
992        assert!((v_log - 20.0).abs() < 0.01);
993    }
994
995    #[test]
996    fn test_parse_cargo_test_output() {
997        let runner = RustTestRunner::new(PathBuf::from("."));
998
999        let output = r#"
1000running 5 tests
1001test tests::test_add ... ok
1002test tests::test_sub ... ok
1003test tests::test_mul ... FAILED
1004test tests::test_div ... ok
1005test tests::test_rem ... ignored
1006
1007test result: ok. 3 passed; 1 failed; 1 ignored; 0 measured; 0 filtered out
1008"#;
1009        let results = runner.parse_cargo_test_output(output);
1010        assert_eq!(results.passed, 3);
1011        assert_eq!(results.failed, 1);
1012        assert_eq!(results.skipped, 1);
1013        assert_eq!(results.total, 5);
1014    }
1015
1016    #[test]
1017    fn test_runner_for_plugin_factory() {
1018        let rust_runner = test_runner_for_plugin("rust", PathBuf::from("."));
1019        assert_eq!(rust_runner.name(), "rust");
1020
1021        let python_runner = test_runner_for_plugin("python", PathBuf::from("."));
1022        assert_eq!(python_runner.name(), "python");
1023
1024        // Unknown falls back to Python
1025        let fallback = test_runner_for_plugin("go", PathBuf::from("."));
1026        assert_eq!(fallback.name(), "python");
1027    }
1028
1029    // =========================================================================
1030    // PluginVerifierRunner tests
1031    // =========================================================================
1032
1033    fn make_test_profile(name: &str, caps: Vec<VerifierCapability>) -> VerifierProfile {
1034        VerifierProfile {
1035            plugin_name: name.to_string(),
1036            capabilities: caps,
1037            lsp: LspCapability {
1038                primary: LspConfig {
1039                    server_binary: "test-ls".to_string(),
1040                    args: vec![],
1041                    language_id: name.to_string(),
1042                },
1043                primary_available: false,
1044                fallback: None,
1045                fallback_available: false,
1046            },
1047        }
1048    }
1049
1050    #[test]
1051    fn test_plugin_verifier_runner_name() {
1052        let profile = make_test_profile("go", vec![]);
1053        let runner = PluginVerifierRunner::new(PathBuf::from("."), profile);
1054        assert_eq!(runner.name(), "go");
1055    }
1056
1057    #[tokio::test]
1058    async fn test_plugin_verifier_runner_no_stage_declared() {
1059        // When no capability is declared for a stage, run_stage returns a no-op pass
1060        let profile = make_test_profile("go", vec![]);
1061        let runner = PluginVerifierRunner::new(PathBuf::from("."), profile);
1062        let result = runner.run_syntax_check().await.unwrap();
1063        assert_eq!(result.passed, 1);
1064        assert_eq!(result.total, 1);
1065        assert!(result.output.contains("No syntax_check stage"));
1066    }
1067
1068    #[tokio::test]
1069    async fn test_plugin_verifier_runner_no_tool_available() {
1070        // Stage is declared but neither primary nor fallback tool is available
1071        let profile = make_test_profile(
1072            "go",
1073            vec![VerifierCapability {
1074                stage: VerifierStage::Build,
1075                command: Some("go build ./...".to_string()),
1076                available: false,
1077                fallback_command: None,
1078                fallback_available: false,
1079            }],
1080        );
1081        let runner = PluginVerifierRunner::new(PathBuf::from("."), profile);
1082        let result = runner.run_build_check().await.unwrap();
1083        assert!(!result.run_succeeded);
1084        assert!(result.output.contains("no tool available"));
1085    }
1086
1087    #[tokio::test]
1088    async fn test_plugin_verifier_runner_echo_command() {
1089        // Use `echo` as a trivially-available command to test real execution
1090        let profile = make_test_profile(
1091            "echo-lang",
1092            vec![VerifierCapability {
1093                stage: VerifierStage::SyntaxCheck,
1094                command: Some("echo syntax-ok".to_string()),
1095                available: true,
1096                fallback_command: None,
1097                fallback_available: false,
1098            }],
1099        );
1100        let runner = PluginVerifierRunner::new(PathBuf::from("."), profile);
1101        let result = runner.run_syntax_check().await.unwrap();
1102        assert_eq!(result.passed, 1);
1103        assert!(result.run_succeeded);
1104        assert!(result.output.contains("syntax-ok"));
1105    }
1106
1107    #[tokio::test]
1108    async fn test_plugin_verifier_runner_run_all_stages() {
1109        let profile = make_test_profile(
1110            "echo-lang",
1111            vec![
1112                VerifierCapability {
1113                    stage: VerifierStage::SyntaxCheck,
1114                    command: Some("echo check".to_string()),
1115                    available: true,
1116                    fallback_command: None,
1117                    fallback_available: false,
1118                },
1119                VerifierCapability {
1120                    stage: VerifierStage::Lint,
1121                    command: Some("echo lint".to_string()),
1122                    available: true,
1123                    fallback_command: None,
1124                    fallback_available: false,
1125                },
1126            ],
1127        );
1128        let runner = PluginVerifierRunner::new(PathBuf::from("."), profile);
1129        let results = runner.run_all_stages().await;
1130        // Only the 2 declared stages should appear
1131        assert_eq!(results.len(), 2);
1132        assert_eq!(results[0].0, VerifierStage::SyntaxCheck);
1133        assert_eq!(results[1].0, VerifierStage::Lint);
1134        assert!(results[0].1.is_ok());
1135        assert!(results[1].1.is_ok());
1136    }
1137
1138    #[test]
1139    fn test_runner_for_profile_factory() {
1140        use perspt_core::plugin::RustPlugin;
1141        // Known plugins get specialised runners
1142        let rust_profile = RustPlugin.verifier_profile();
1143        let runner = test_runner_for_profile(rust_profile, PathBuf::from("."));
1144        assert_eq!(runner.name(), "rust");
1145
1146        // Unknown plugins get PluginVerifierRunner
1147        let custom = make_test_profile("go", vec![]);
1148        let runner = test_runner_for_profile(custom, PathBuf::from("."));
1149        assert_eq!(runner.name(), "go");
1150    }
1151
1152    #[tokio::test]
1153    async fn test_exec_command_rejects_dangerous_pattern() {
1154        let profile = make_test_profile(
1155            "danger",
1156            vec![VerifierCapability {
1157                stage: VerifierStage::SyntaxCheck,
1158                command: Some("rm -rf /".to_string()),
1159                available: true,
1160                fallback_command: None,
1161                fallback_available: false,
1162            }],
1163        );
1164        let runner = PluginVerifierRunner::new(PathBuf::from("/tmp"), profile);
1165        let result = runner.run_syntax_check().await;
1166        // The command should be rejected by policy sanitisation
1167        assert!(result.is_err());
1168    }
1169
1170    #[tokio::test]
1171    async fn test_exec_command_rejects_workspace_escape() {
1172        let profile = make_test_profile(
1173            "escape",
1174            vec![VerifierCapability {
1175                stage: VerifierStage::SyntaxCheck,
1176                command: Some("cat /etc/passwd".to_string()),
1177                available: true,
1178                fallback_command: None,
1179                fallback_available: false,
1180            }],
1181        );
1182        let runner = PluginVerifierRunner::new(PathBuf::from("/home/user/project"), profile);
1183        let result = runner.run_syntax_check().await;
1184        // The command references a path outside the workspace
1185        assert!(result.is_err());
1186    }
1187
1188    #[test]
1189    fn test_fallback_command_selected_when_primary_unavailable() {
1190        let cap = VerifierCapability {
1191            stage: VerifierStage::Test,
1192            command: Some("uv run pytest".to_string()),
1193            available: false,
1194            fallback_command: Some("python -m pytest".to_string()),
1195            fallback_available: true,
1196        };
1197        assert_eq!(cap.effective_command(), Some("python -m pytest"));
1198    }
1199}
perspt_agent/test_runner.rs

perspt_agent/
test_runner.rs