perspt_agent/
test_runner.rs

1//! Python Test Runner
2//!
3//! Executes pytest in Python workspaces using `uv` as the package manager.
4//! Handles project setup (pyproject.toml) and test execution for V_log calculation.
5//!
6//! Future phases will add support for other languages (Rust, JavaScript, etc.)
7
8use anyhow::{Context, Result};
9use std::path::{Path, PathBuf};
10use std::process::Stdio;
11use tokio::process::Command;
12
13use crate::types::{BehavioralContract, Criticality};
14
15/// Result of a test run
16#[derive(Debug, Clone, Default)]
17pub struct TestResults {
18    /// Number of passed tests
19    pub passed: usize,
20    /// Number of failed tests
21    pub failed: usize,
22    /// Number of skipped tests
23    pub skipped: usize,
24    /// Total tests run
25    pub total: usize,
26    /// Detailed failure information
27    pub failures: Vec<TestFailure>,
28    /// Duration in milliseconds
29    pub duration_ms: u64,
30    /// Raw output
31    pub output: String,
32    /// Whether the test run was successful (no infrastructure errors)
33    pub run_succeeded: bool,
34}
35
36impl TestResults {
37    /// Check if all tests passed
38    pub fn all_passed(&self) -> bool {
39        self.run_succeeded && self.failed == 0
40    }
41
42    /// Get pass rate as percentage
43    pub fn pass_rate(&self) -> f32 {
44        if self.total == 0 {
45            1.0
46        } else {
47            (self.passed as f32) / (self.total as f32)
48        }
49    }
50}
51
52/// Information about a single test failure
53#[derive(Debug, Clone)]
54pub struct TestFailure {
55    /// Test name (e.g., "test_divide_by_zero")
56    pub name: String,
57    /// Test file path
58    pub file: Option<String>,
59    /// Line number where failure occurred
60    pub line: Option<u32>,
61    /// Error message
62    pub message: String,
63    /// Criticality (from weighted tests if matched)
64    pub criticality: Criticality,
65}
66
67/// Python test runner using uv and pytest
68///
69/// Handles:
70/// 1. Checking for pyproject.toml
71/// 2. Setting up Python environment via uv
72/// 3. Running pytest
73/// 4. Parsing results for V_log calculation
74pub struct PythonTestRunner {
75    /// Working directory (workspace root)
76    working_dir: PathBuf,
77    /// Timeout in seconds
78    timeout_secs: u64,
79    /// Whether to auto-setup if no pyproject.toml
80    auto_setup: bool,
81}
82
83impl PythonTestRunner {
84    /// Create a new Python test runner
85    pub fn new(working_dir: PathBuf) -> Self {
86        Self {
87            working_dir,
88            timeout_secs: 300, // 5 minute default timeout
89            auto_setup: true,
90        }
91    }
92
93    /// Set timeout
94    pub fn with_timeout(mut self, secs: u64) -> Self {
95        self.timeout_secs = secs;
96        self
97    }
98
99    /// Disable auto-setup (don't create pyproject.toml if missing)
100    pub fn without_auto_setup(mut self) -> Self {
101        self.auto_setup = false;
102        self
103    }
104
105    /// Check if workspace has a Python project setup
106    pub fn has_pyproject(&self) -> bool {
107        self.working_dir.join("pyproject.toml").exists()
108    }
109
110    /// Check if workspace has pytest configured
111    pub async fn has_pytest(&self) -> bool {
112        // Check if pytest is in pyproject.toml or can be run
113        let result = Command::new("uv")
114            .args(["run", "pytest", "--version"])
115            .current_dir(&self.working_dir)
116            .stdout(Stdio::null())
117            .stderr(Stdio::null())
118            .status()
119            .await;
120
121        result.map(|s| s.success()).unwrap_or(false)
122    }
123
124    /// Initialize the Python environment with uv
125    /// NOTE: This assumes pyproject.toml already exists (created by orchestrator's step_init_project)
126    pub async fn setup_environment(&self) -> Result<()> {
127        log::info!("Setting up Python environment with uv");
128
129        // Check if pyproject.toml exists; if not, warn and try to proceed
130        if !self.has_pyproject() {
131            if self.auto_setup {
132                log::warn!(
133                    "No pyproject.toml found. Project should be initialized via 'uv init' first."
134                );
135                log::info!("Attempting to run 'uv init' as fallback...");
136                let init_output = Command::new("uv")
137                    .args(["init"])
138                    .current_dir(&self.working_dir)
139                    .stdout(Stdio::piped())
140                    .stderr(Stdio::piped())
141                    .output()
142                    .await
143                    .context("Failed to run uv init")?;
144
145                if !init_output.status.success() {
146                    let stderr = String::from_utf8_lossy(&init_output.stderr);
147                    log::warn!("uv init failed: {}", stderr);
148                    return self.install_pytest_directly().await;
149                }
150            } else {
151                anyhow::bail!(
152                    "No pyproject.toml found and auto_setup is disabled. Run 'uv init' first."
153                );
154            }
155        }
156
157        // Sync dependencies (this creates venv and installs deps)
158        let output = Command::new("uv")
159            .args(["sync", "--dev"])
160            .current_dir(&self.working_dir)
161            .stdout(Stdio::piped())
162            .stderr(Stdio::piped())
163            .output()
164            .await
165            .context("Failed to run uv sync")?;
166
167        if !output.status.success() {
168            let stderr = String::from_utf8_lossy(&output.stderr);
169            log::warn!("uv sync failed: {}", stderr);
170            // Try just installing pytest directly
171            return self.install_pytest_directly().await;
172        }
173
174        log::info!("Python environment ready");
175        Ok(())
176    }
177
178    /// Install pytest directly without a full project setup
179    async fn install_pytest_directly(&self) -> Result<()> {
180        log::info!("Installing pytest via uv pip");
181
182        let output = Command::new("uv")
183            .args(["pip", "install", "pytest"])
184            .current_dir(&self.working_dir)
185            .stdout(Stdio::piped())
186            .stderr(Stdio::piped())
187            .output()
188            .await
189            .context("Failed to install pytest")?;
190
191        if !output.status.success() {
192            let stderr = String::from_utf8_lossy(&output.stderr);
193            anyhow::bail!("Failed to install pytest: {}", stderr);
194        }
195
196        Ok(())
197    }
198
199    /// Run pytest and parse results
200    ///
201    /// If environment is not set up, will attempt to set it up first.
202    pub async fn run_pytest(&self, test_args: &[&str]) -> Result<TestResults> {
203        log::info!("Running pytest in {}", self.working_dir.display());
204
205        // Ensure environment is set up
206        if !self.has_pytest().await {
207            self.setup_environment().await?;
208        }
209
210        // Build pytest command
211        let mut args = vec!["run", "pytest", "-v", "--tb=short"];
212        args.extend(test_args);
213
214        let start = std::time::Instant::now();
215
216        let output = Command::new("uv")
217            .args(&args)
218            .current_dir(&self.working_dir)
219            .stdout(Stdio::piped())
220            .stderr(Stdio::piped())
221            .output()
222            .await
223            .context("Failed to run pytest")?;
224
225        let duration_ms = start.elapsed().as_millis() as u64;
226        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
227        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
228        let combined = format!("{}\n{}", stdout, stderr);
229
230        log::debug!("pytest exit code: {:?}", output.status.code());
231        if !stdout.is_empty() {
232            log::debug!("pytest stdout:\n{}", stdout);
233        }
234
235        let mut results = self.parse_pytest_output(&combined, duration_ms);
236        results.run_succeeded = true; // We got output, run worked
237
238        // Log summary
239        if results.all_passed() {
240            log::info!("✅ Tests passed: {}/{}", results.passed, results.total);
241        } else {
242            log::info!(
243                "❌ Tests failed: {} passed, {} failed",
244                results.passed,
245                results.failed
246            );
247        }
248
249        Ok(results)
250    }
251
252    /// Run pytest on specific test files
253    pub async fn run_test_files(&self, test_files: &[&Path]) -> Result<TestResults> {
254        let file_args: Vec<&str> = test_files.iter().filter_map(|p| p.to_str()).collect();
255
256        self.run_pytest(&file_args).await
257    }
258
259    /// Parse pytest output into TestResults
260    fn parse_pytest_output(&self, output: &str, duration_ms: u64) -> TestResults {
261        let mut results = TestResults {
262            duration_ms,
263            output: output.to_string(),
264            ..Default::default()
265        };
266
267        // Parse summary line: "X passed, Y failed, Z skipped in 0.12s"
268        for line in output.lines() {
269            let line = line.trim();
270
271            // Look for summary patterns (usually starts with = signs)
272            if (line.contains("passed") || line.contains("failed") || line.contains("error"))
273                && (line.contains(" in ") || line.starts_with('='))
274            {
275                let parts: Vec<&str> = line.split_whitespace().collect();
276                for i in 0..parts.len() {
277                    if parts[i] == "passed" || parts[i] == "passed," {
278                        if i > 0 {
279                            if let Ok(n) = parts[i - 1].trim_matches(',').parse::<usize>() {
280                                results.passed = n;
281                            }
282                        }
283                    } else if parts[i] == "failed" || parts[i] == "failed," {
284                        if i > 0 {
285                            if let Ok(n) = parts[i - 1].trim_matches(',').parse::<usize>() {
286                                results.failed = n;
287                            }
288                        }
289                    } else if parts[i] == "skipped" || parts[i] == "skipped," {
290                        if i > 0 {
291                            if let Ok(n) = parts[i - 1].trim_matches(',').parse::<usize>() {
292                                results.skipped = n;
293                            }
294                        }
295                    } else if (parts[i] == "error" || parts[i] == "errors") && i > 0 {
296                        if let Ok(n) = parts[i - 1].trim_matches(',').parse::<usize>() {
297                            results.failed += n;
298                        }
299                    }
300                }
301            }
302
303            // Parse individual test failures
304            // "FAILED test_file.py::TestClass::test_method - AssertionError"
305            if line.starts_with("FAILED ") {
306                let failure = self.parse_failure_line(line);
307                results.failures.push(failure);
308            }
309        }
310
311        results.total = results.passed + results.failed + results.skipped;
312        results
313    }
314
315    /// Parse a pytest FAILED line
316    fn parse_failure_line(&self, line: &str) -> TestFailure {
317        // Format: "FAILED test_file.py::TestClass::test_method - Error message"
318        let rest = line.strip_prefix("FAILED ").unwrap_or(line);
319
320        let (test_path, message) = if let Some(idx) = rest.find(" - ") {
321            (&rest[..idx], rest[idx + 3..].to_string())
322        } else {
323            (rest, String::new())
324        };
325
326        // Parse test path (file::class::method or file::method)
327        let parts: Vec<&str> = test_path.split("::").collect();
328        let (file, name) = if parts.len() >= 2 {
329            (
330                Some(parts[0].to_string()),
331                parts.last().unwrap_or(&"").to_string(),
332            )
333        } else {
334            (None, test_path.to_string())
335        };
336
337        TestFailure {
338            name,
339            file,
340            line: None,
341            message,
342            criticality: Criticality::High, // Default, will be updated by match_weighted_tests
343        }
344    }
345
346    /// Calculate V_log (Logic Energy) from test results and behavioral contract
347    /// Uses weighted tests from the contract to determine criticality
348    pub fn calculate_v_log(&self, results: &TestResults, contract: &BehavioralContract) -> f32 {
349        let gamma = contract.gamma(); // Default 2.0
350        let mut v_log = 0.0;
351
352        for failure in &results.failures {
353            // Find matching weighted test from contract
354            let weight = contract
355                .weighted_tests
356                .iter()
357                .find(|wt| {
358                    failure.name.contains(&wt.test_name) || wt.test_name.contains(&failure.name)
359                })
360                .map(|wt| wt.criticality.weight())
361                .unwrap_or(Criticality::High.weight()); // Default to High if no match
362
363            v_log += gamma * weight;
364        }
365
366        v_log
367    }
368
369    /// Match test failures with weighted tests from contract to set criticality
370    pub fn match_weighted_tests(&self, results: &mut TestResults, contract: &BehavioralContract) {
371        for failure in &mut results.failures {
372            if let Some(wt) = contract.weighted_tests.iter().find(|wt| {
373                failure.name.contains(&wt.test_name) || wt.test_name.contains(&failure.name)
374            }) {
375                failure.criticality = wt.criticality;
376            }
377        }
378    }
379}
380
381// Re-export PythonTestRunner as TestRunner for now
382// In future phases, we'll add a generic TestRunner trait
383pub type TestRunner = PythonTestRunner;
384
385#[cfg(test)]
386mod tests {
387    use super::*;
388    use crate::types::WeightedTest;
389
390    #[test]
391    fn test_parse_pytest_summary() {
392        let runner = PythonTestRunner::new(PathBuf::from("."));
393
394        let output = "===== 3 passed, 2 failed, 1 skipped in 0.12s =====";
395        let results = runner.parse_pytest_output(output, 120);
396
397        assert_eq!(results.passed, 3);
398        assert_eq!(results.failed, 2);
399        assert_eq!(results.skipped, 1);
400        assert_eq!(results.total, 6);
401    }
402
403    #[test]
404    fn test_parse_pytest_failure_line() {
405        let runner = PythonTestRunner::new(PathBuf::from("."));
406
407        let line = "FAILED test_calculator.py::TestDivide::test_divide_by_zero - ZeroDivisionError";
408        let failure = runner.parse_failure_line(line);
409
410        assert_eq!(failure.name, "test_divide_by_zero");
411        assert_eq!(failure.file, Some("test_calculator.py".to_string()));
412        assert!(failure.message.contains("ZeroDivisionError"));
413    }
414
415    #[test]
416    fn test_calculate_v_log() {
417        let runner = PythonTestRunner::new(PathBuf::from("."));
418
419        let results = TestResults {
420            failures: vec![TestFailure {
421                name: "test_critical_feature".to_string(),
422                file: None,
423                line: None,
424                message: String::new(),
425                criticality: Criticality::Critical,
426            }],
427            ..Default::default()
428        };
429
430        let mut contract = BehavioralContract::new();
431        contract.weighted_tests = vec![WeightedTest {
432            test_name: "test_critical_feature".to_string(),
433            criticality: Criticality::Critical,
434        }];
435
436        let v_log = runner.calculate_v_log(&results, &contract);
437        // gamma (2.0) * Critical weight (10.0) = 20.0
438        assert!((v_log - 20.0).abs() < 0.01);
439    }
440}