perspt_agent/
test_runner.rs

1//! Python Test Runner
2//!
3//! Executes pytest in Python workspaces using `uv` as the package manager.
4//! Handles project setup (pyproject.toml) and test execution for V_log calculation.
5//!
6//! Future phases will add support for other languages (Rust, JavaScript, etc.)
7
8use anyhow::{Context, Result};
9use std::path::{Path, PathBuf};
10use std::process::Stdio;
11use tokio::process::Command;
12
13use crate::types::{BehavioralContract, Criticality};
14
15/// Result of a test run
16#[derive(Debug, Clone, Default)]
17pub struct TestResults {
18    /// Number of passed tests
19    pub passed: usize,
20    /// Number of failed tests
21    pub failed: usize,
22    /// Number of skipped tests
23    pub skipped: usize,
24    /// Total tests run
25    pub total: usize,
26    /// Detailed failure information
27    pub failures: Vec<TestFailure>,
28    /// Duration in milliseconds
29    pub duration_ms: u64,
30    /// Raw output
31    pub output: String,
32    /// Whether the test run was successful (no infrastructure errors)
33    pub run_succeeded: bool,
34}
35
36impl TestResults {
37    /// Check if all tests passed
38    pub fn all_passed(&self) -> bool {
39        self.run_succeeded && self.failed == 0
40    }
41
42    /// Get pass rate as percentage
43    pub fn pass_rate(&self) -> f32 {
44        if self.total == 0 {
45            1.0
46        } else {
47            (self.passed as f32) / (self.total as f32)
48        }
49    }
50}
51
52/// Information about a single test failure
53#[derive(Debug, Clone)]
54pub struct TestFailure {
55    /// Test name (e.g., "test_divide_by_zero")
56    pub name: String,
57    /// Test file path
58    pub file: Option<String>,
59    /// Line number where failure occurred
60    pub line: Option<u32>,
61    /// Error message
62    pub message: String,
63    /// Criticality (from weighted tests if matched)
64    pub criticality: Criticality,
65}
66
67/// Python test runner using uv and pytest
68///
69/// Handles:
70/// 1. Checking for pyproject.toml
71/// 2. Setting up Python environment via uv
72/// 3. Running pytest
73/// 4. Parsing results for V_log calculation
74pub struct PythonTestRunner {
75    /// Working directory (workspace root)
76    working_dir: PathBuf,
77    /// Timeout in seconds
78    timeout_secs: u64,
79    /// Whether to auto-setup if no pyproject.toml
80    auto_setup: bool,
81}
82
83impl PythonTestRunner {
84    /// Create a new Python test runner
85    pub fn new(working_dir: PathBuf) -> Self {
86        Self {
87            working_dir,
88            timeout_secs: 300, // 5 minute default timeout
89            auto_setup: true,
90        }
91    }
92
93    /// Set timeout
94    pub fn with_timeout(mut self, secs: u64) -> Self {
95        self.timeout_secs = secs;
96        self
97    }
98
99    /// Disable auto-setup (don't create pyproject.toml if missing)
100    pub fn without_auto_setup(mut self) -> Self {
101        self.auto_setup = false;
102        self
103    }
104
105    /// Check if workspace has a Python project setup
106    pub fn has_pyproject(&self) -> bool {
107        self.working_dir.join("pyproject.toml").exists()
108    }
109
110    /// Check if workspace has pytest configured
111    pub async fn has_pytest(&self) -> bool {
112        // Check if pytest is in pyproject.toml or can be run
113        let result = Command::new("uv")
114            .args(["run", "pytest", "--version"])
115            .current_dir(&self.working_dir)
116            .stdout(Stdio::null())
117            .stderr(Stdio::null())
118            .status()
119            .await;
120
121        result.map(|s| s.success()).unwrap_or(false)
122    }
123
124    /// Create a minimal pyproject.toml with pytest dependency
125    pub async fn create_pyproject(&self) -> Result<()> {
126        let pyproject_path = self.working_dir.join("pyproject.toml");
127
128        if pyproject_path.exists() {
129            log::debug!("pyproject.toml already exists");
130            return Ok(());
131        }
132
133        log::info!("Creating minimal pyproject.toml with pytest");
134
135        let content = r#"[project]
136name = "workspace"
137version = "0.1.0"
138requires-python = ">=3.10"
139dependencies = []
140
141[project.optional-dependencies]
142dev = ["pytest>=8.0"]
143
144[tool.pytest.ini_options]
145testpaths = ["tests", "."]
146python_files = ["test_*.py", "*_test.py"]
147python_functions = ["test_*"]
148"#;
149
150        tokio::fs::write(&pyproject_path, content)
151            .await
152            .context("Failed to write pyproject.toml")?;
153
154        Ok(())
155    }
156
157    /// Initialize the Python environment with uv
158    pub async fn setup_environment(&self) -> Result<()> {
159        log::info!("Setting up Python environment with uv");
160
161        // Create pyproject.toml if needed
162        if self.auto_setup && !self.has_pyproject() {
163            self.create_pyproject().await?;
164        }
165
166        // Sync dependencies (this creates venv and installs deps)
167        let output = Command::new("uv")
168            .args(["sync", "--dev"])
169            .current_dir(&self.working_dir)
170            .stdout(Stdio::piped())
171            .stderr(Stdio::piped())
172            .output()
173            .await
174            .context("Failed to run uv sync")?;
175
176        if !output.status.success() {
177            let stderr = String::from_utf8_lossy(&output.stderr);
178            log::warn!("uv sync failed: {}", stderr);
179            // Try just installing pytest directly
180            return self.install_pytest_directly().await;
181        }
182
183        log::info!("Python environment ready");
184        Ok(())
185    }
186
187    /// Install pytest directly without a full project setup
188    async fn install_pytest_directly(&self) -> Result<()> {
189        log::info!("Installing pytest via uv pip");
190
191        let output = Command::new("uv")
192            .args(["pip", "install", "pytest"])
193            .current_dir(&self.working_dir)
194            .stdout(Stdio::piped())
195            .stderr(Stdio::piped())
196            .output()
197            .await
198            .context("Failed to install pytest")?;
199
200        if !output.status.success() {
201            let stderr = String::from_utf8_lossy(&output.stderr);
202            anyhow::bail!("Failed to install pytest: {}", stderr);
203        }
204
205        Ok(())
206    }
207
208    /// Run pytest and parse results
209    ///
210    /// If environment is not set up, will attempt to set it up first.
211    pub async fn run_pytest(&self, test_args: &[&str]) -> Result<TestResults> {
212        log::info!("Running pytest in {}", self.working_dir.display());
213        println!("   ๐Ÿงช Running tests...");
214
215        // Ensure environment is set up
216        if !self.has_pytest().await {
217            self.setup_environment().await?;
218        }
219
220        // Build pytest command
221        let mut args = vec!["run", "pytest", "-v", "--tb=short"];
222        args.extend(test_args);
223
224        let start = std::time::Instant::now();
225
226        let output = Command::new("uv")
227            .args(&args)
228            .current_dir(&self.working_dir)
229            .stdout(Stdio::piped())
230            .stderr(Stdio::piped())
231            .output()
232            .await
233            .context("Failed to run pytest")?;
234
235        let duration_ms = start.elapsed().as_millis() as u64;
236        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
237        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
238        let combined = format!("{}\n{}", stdout, stderr);
239
240        log::debug!("pytest exit code: {:?}", output.status.code());
241        if !stdout.is_empty() {
242            log::debug!("pytest stdout:\n{}", stdout);
243        }
244
245        let mut results = self.parse_pytest_output(&combined, duration_ms);
246        results.run_succeeded = true; // We got output, run worked
247
248        // Print summary
249        if results.all_passed() {
250            println!("   โœ… Tests passed: {}/{}", results.passed, results.total);
251        } else {
252            println!(
253                "   โŒ Tests failed: {} passed, {} failed",
254                results.passed, results.failed
255            );
256        }
257
258        Ok(results)
259    }
260
261    /// Run pytest on specific test files
262    pub async fn run_test_files(&self, test_files: &[&Path]) -> Result<TestResults> {
263        let file_args: Vec<&str> = test_files.iter().filter_map(|p| p.to_str()).collect();
264
265        self.run_pytest(&file_args).await
266    }
267
268    /// Parse pytest output into TestResults
269    fn parse_pytest_output(&self, output: &str, duration_ms: u64) -> TestResults {
270        let mut results = TestResults {
271            duration_ms,
272            output: output.to_string(),
273            ..Default::default()
274        };
275
276        // Parse summary line: "X passed, Y failed, Z skipped in 0.12s"
277        for line in output.lines() {
278            let line = line.trim();
279
280            // Look for summary patterns (usually starts with = signs)
281            if (line.contains("passed") || line.contains("failed") || line.contains("error"))
282                && (line.contains(" in ") || line.starts_with('='))
283            {
284                let parts: Vec<&str> = line.split_whitespace().collect();
285                for i in 0..parts.len() {
286                    if parts[i] == "passed" || parts[i] == "passed," {
287                        if i > 0 {
288                            if let Ok(n) = parts[i - 1].trim_matches(',').parse::<usize>() {
289                                results.passed = n;
290                            }
291                        }
292                    } else if parts[i] == "failed" || parts[i] == "failed," {
293                        if i > 0 {
294                            if let Ok(n) = parts[i - 1].trim_matches(',').parse::<usize>() {
295                                results.failed = n;
296                            }
297                        }
298                    } else if parts[i] == "skipped" || parts[i] == "skipped," {
299                        if i > 0 {
300                            if let Ok(n) = parts[i - 1].trim_matches(',').parse::<usize>() {
301                                results.skipped = n;
302                            }
303                        }
304                    } else if (parts[i] == "error" || parts[i] == "errors") && i > 0 {
305                        if let Ok(n) = parts[i - 1].trim_matches(',').parse::<usize>() {
306                            results.failed += n;
307                        }
308                    }
309                }
310            }
311
312            // Parse individual test failures
313            // "FAILED test_file.py::TestClass::test_method - AssertionError"
314            if line.starts_with("FAILED ") {
315                let failure = self.parse_failure_line(line);
316                results.failures.push(failure);
317            }
318        }
319
320        results.total = results.passed + results.failed + results.skipped;
321        results
322    }
323
324    /// Parse a pytest FAILED line
325    fn parse_failure_line(&self, line: &str) -> TestFailure {
326        // Format: "FAILED test_file.py::TestClass::test_method - Error message"
327        let rest = line.strip_prefix("FAILED ").unwrap_or(line);
328
329        let (test_path, message) = if let Some(idx) = rest.find(" - ") {
330            (&rest[..idx], rest[idx + 3..].to_string())
331        } else {
332            (rest, String::new())
333        };
334
335        // Parse test path (file::class::method or file::method)
336        let parts: Vec<&str> = test_path.split("::").collect();
337        let (file, name) = if parts.len() >= 2 {
338            (
339                Some(parts[0].to_string()),
340                parts.last().unwrap_or(&"").to_string(),
341            )
342        } else {
343            (None, test_path.to_string())
344        };
345
346        TestFailure {
347            name,
348            file,
349            line: None,
350            message,
351            criticality: Criticality::High, // Default, will be updated by match_weighted_tests
352        }
353    }
354
355    /// Calculate V_log (Logic Energy) from test results and behavioral contract
356    /// Uses weighted tests from the contract to determine criticality
357    pub fn calculate_v_log(&self, results: &TestResults, contract: &BehavioralContract) -> f32 {
358        let gamma = contract.gamma(); // Default 2.0
359        let mut v_log = 0.0;
360
361        for failure in &results.failures {
362            // Find matching weighted test from contract
363            let weight = contract
364                .weighted_tests
365                .iter()
366                .find(|wt| {
367                    failure.name.contains(&wt.test_name) || wt.test_name.contains(&failure.name)
368                })
369                .map(|wt| wt.criticality.weight())
370                .unwrap_or(Criticality::High.weight()); // Default to High if no match
371
372            v_log += gamma * weight;
373        }
374
375        v_log
376    }
377
378    /// Match test failures with weighted tests from contract to set criticality
379    pub fn match_weighted_tests(&self, results: &mut TestResults, contract: &BehavioralContract) {
380        for failure in &mut results.failures {
381            if let Some(wt) = contract.weighted_tests.iter().find(|wt| {
382                failure.name.contains(&wt.test_name) || wt.test_name.contains(&failure.name)
383            }) {
384                failure.criticality = wt.criticality;
385            }
386        }
387    }
388}
389
390// Re-export PythonTestRunner as TestRunner for now
391// In future phases, we'll add a generic TestRunner trait
392pub type TestRunner = PythonTestRunner;
393
394#[cfg(test)]
395mod tests {
396    use super::*;
397    use crate::types::WeightedTest;
398
399    #[test]
400    fn test_parse_pytest_summary() {
401        let runner = PythonTestRunner::new(PathBuf::from("."));
402
403        let output = "===== 3 passed, 2 failed, 1 skipped in 0.12s =====";
404        let results = runner.parse_pytest_output(output, 120);
405
406        assert_eq!(results.passed, 3);
407        assert_eq!(results.failed, 2);
408        assert_eq!(results.skipped, 1);
409        assert_eq!(results.total, 6);
410    }
411
412    #[test]
413    fn test_parse_pytest_failure_line() {
414        let runner = PythonTestRunner::new(PathBuf::from("."));
415
416        let line = "FAILED test_calculator.py::TestDivide::test_divide_by_zero - ZeroDivisionError";
417        let failure = runner.parse_failure_line(line);
418
419        assert_eq!(failure.name, "test_divide_by_zero");
420        assert_eq!(failure.file, Some("test_calculator.py".to_string()));
421        assert!(failure.message.contains("ZeroDivisionError"));
422    }
423
424    #[test]
425    fn test_calculate_v_log() {
426        let runner = PythonTestRunner::new(PathBuf::from("."));
427
428        let results = TestResults {
429            failures: vec![TestFailure {
430                name: "test_critical_feature".to_string(),
431                file: None,
432                line: None,
433                message: String::new(),
434                criticality: Criticality::Critical,
435            }],
436            ..Default::default()
437        };
438
439        let mut contract = BehavioralContract::new();
440        contract.weighted_tests = vec![WeightedTest {
441            test_name: "test_critical_feature".to_string(),
442            criticality: Criticality::Critical,
443        }];
444
445        let v_log = runner.calculate_v_log(&results, &contract);
446        // gamma (2.0) * Critical weight (10.0) = 20.0
447        assert!((v_log - 20.0).abs() < 0.01);
448    }
449}