Skip to main content

decy_llm/
verifier.rs

1//! Verification and iteration framework for LLM-generated code (DECY-100).
2//!
3//! Verifies that generated Rust code compiles, passes tests, and
4//! iterates on failures with error feedback.
5
6use crate::llm_codegen::{GeneratedCode, LlmError};
7use serde::{Deserialize, Serialize};
8
9/// Result of code verification.
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct VerificationResult {
12    /// Whether the code compiled successfully
13    pub compiles: bool,
14    /// Whether all tests passed
15    pub tests_pass: bool,
16    /// Compilation errors (if any)
17    pub compile_errors: Vec<String>,
18    /// Test failures (if any)
19    pub test_failures: Vec<String>,
20    /// Number of clippy warnings
21    pub clippy_warnings: usize,
22    /// Overall success
23    pub success: bool,
24}
25
26impl VerificationResult {
27    /// Create a successful result.
28    pub fn success() -> Self {
29        Self {
30            compiles: true,
31            tests_pass: true,
32            compile_errors: Vec::new(),
33            test_failures: Vec::new(),
34            clippy_warnings: 0,
35            success: true,
36        }
37    }
38
39    /// Create a compilation failure result.
40    pub fn compile_failure(errors: Vec<String>) -> Self {
41        Self {
42            compiles: false,
43            tests_pass: false,
44            compile_errors: errors,
45            test_failures: Vec::new(),
46            clippy_warnings: 0,
47            success: false,
48        }
49    }
50
51    /// Create a test failure result.
52    pub fn test_failure(failures: Vec<String>) -> Self {
53        Self {
54            compiles: true,
55            tests_pass: false,
56            compile_errors: Vec::new(),
57            test_failures: failures,
58            clippy_warnings: 0,
59            success: false,
60        }
61    }
62}
63
64/// Iteration context for retry with error feedback.
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct IterationContext {
67    /// Current iteration number (1-based)
68    pub iteration: usize,
69    /// Maximum iterations allowed
70    pub max_iterations: usize,
71    /// Previous generated code
72    pub previous_code: Option<String>,
73    /// Previous errors
74    pub previous_errors: Vec<String>,
75    /// Accumulated feedback
76    pub feedback: Vec<String>,
77}
78
79impl IterationContext {
80    /// Create a new iteration context.
81    pub fn new(max_iterations: usize) -> Self {
82        Self {
83            iteration: 1,
84            max_iterations,
85            previous_code: None,
86            previous_errors: Vec::new(),
87            feedback: Vec::new(),
88        }
89    }
90
91    /// Check if more iterations are allowed.
92    pub fn can_retry(&self) -> bool {
93        self.iteration <= self.max_iterations
94    }
95
96    /// Record a failed iteration.
97    pub fn record_failure(&mut self, code: &str, errors: Vec<String>) {
98        self.previous_code = Some(code.to_string());
99        self.previous_errors = errors.clone();
100
101        // Add to feedback
102        for error in &errors {
103            self.feedback.push(format!("Iteration {}: {}", self.iteration, error));
104        }
105
106        self.iteration += 1;
107    }
108
109    /// Get formatted feedback for next iteration.
110    pub fn get_feedback(&self) -> String {
111        let mut feedback = String::new();
112
113        feedback.push_str("## Previous Errors\n\n");
114
115        for error in &self.previous_errors {
116            feedback.push_str("- ");
117            feedback.push_str(error);
118            feedback.push('\n');
119        }
120
121        if let Some(ref code) = self.previous_code {
122            feedback.push_str("\n## Previous Code\n```rust\n");
123            feedback.push_str(code);
124            feedback.push_str("\n```\n");
125        }
126
127        feedback.push_str("\n## Instructions\n");
128        feedback.push_str("Please fix the errors above and generate corrected Rust code.\n");
129
130        feedback
131    }
132}
133
134/// Code verifier that compiles and tests generated Rust.
135#[derive(Debug)]
136pub struct CodeVerifier {
137    /// Temporary directory for compilation (not used in stub)
138    _temp_dir: Option<std::path::PathBuf>,
139}
140
141impl CodeVerifier {
142    /// Create a new code verifier.
143    pub fn new() -> Self {
144        Self { _temp_dir: None }
145    }
146
147    /// Verify generated code by compiling it.
148    ///
149    /// Note: This is a stub for research purposes. Full implementation
150    /// would create a temporary project and run cargo build.
151    pub fn verify(&self, code: &GeneratedCode) -> Result<VerificationResult, LlmError> {
152        // Basic validation - check if code looks valid
153        if code.code.trim().is_empty() {
154            return Ok(VerificationResult::compile_failure(vec!["Empty code".to_string()]));
155        }
156
157        // Check for balanced braces (basic syntax check)
158        let open = code.code.matches('{').count();
159        let close = code.code.matches('}').count();
160
161        if open != close {
162            return Ok(VerificationResult::compile_failure(vec![format!(
163                "Unbalanced braces: {} open, {} close",
164                open, close
165            )]));
166        }
167
168        // For research purposes, assume valid-looking code compiles
169        Ok(VerificationResult::success())
170    }
171
172    /// Try to compile Rust code.
173    ///
174    /// Stub implementation - full version would use cargo.
175    pub fn compile(&self, code: &str) -> Result<(), Vec<String>> {
176        if code.trim().is_empty() {
177            return Err(vec!["Empty code".to_string()]);
178        }
179
180        // Basic syntax checks
181        let open_braces = code.matches('{').count();
182        let close_braces = code.matches('}').count();
183
184        if open_braces != close_braces {
185            return Err(vec![format!(
186                "Unbalanced braces: {} open, {} close",
187                open_braces, close_braces
188            )]);
189        }
190
191        Ok(())
192    }
193
194    /// Run clippy on the code.
195    ///
196    /// Stub implementation - returns 0 warnings for valid code.
197    pub fn lint(&self, code: &str) -> Result<usize, LlmError> {
198        // Basic check - count potential issues
199        let mut warnings = 0;
200
201        // Check for common issues
202        if code.contains("unwrap()") {
203            warnings += 1;
204        }
205        if code.contains("expect(") {
206            warnings += 1;
207        }
208        if code.contains("panic!") {
209            warnings += 1;
210        }
211
212        Ok(warnings)
213    }
214
215    /// Run tests if present.
216    ///
217    /// Stub implementation.
218    pub fn run_tests(&self, code: &str) -> Result<(), Vec<String>> {
219        // Check if there are test functions
220        if code.contains("#[test]") {
221            // Assume tests pass for valid code in stub
222            Ok(())
223        } else {
224            // No tests to run
225            Ok(())
226        }
227    }
228
229    /// Create a temporary project for compilation.
230    ///
231    /// Stub implementation.
232    fn _create_temp_project(&self, _code: &str) -> Result<std::path::PathBuf, LlmError> {
233        Err(LlmError::ApiError("Temporary project creation not implemented".to_string()))
234    }
235}
236
237impl Default for CodeVerifier {
238    fn default() -> Self {
239        Self::new()
240    }
241}
242
243/// Verification-iteration loop runner.
244#[derive(Debug)]
245pub struct VerificationLoop {
246    /// Maximum iterations
247    max_iterations: usize,
248}
249
250impl VerificationLoop {
251    /// Create a new verification loop.
252    pub fn new(max_iterations: usize) -> Self {
253        Self { max_iterations }
254    }
255
256    /// Get max iterations.
257    pub fn max_iterations(&self) -> usize {
258        self.max_iterations
259    }
260
261    /// Check if a result indicates success.
262    pub fn is_success(&self, result: &VerificationResult) -> bool {
263        result.success && result.compiles && result.tests_pass
264    }
265
266    /// Format errors for feedback.
267    pub fn format_feedback(&self, result: &VerificationResult) -> String {
268        let mut feedback = String::new();
269
270        if !result.compile_errors.is_empty() {
271            feedback.push_str("## Compilation Errors\n\n");
272            for error in &result.compile_errors {
273                feedback.push_str("- ");
274                feedback.push_str(error);
275                feedback.push('\n');
276            }
277        }
278
279        if !result.test_failures.is_empty() {
280            feedback.push_str("\n## Test Failures\n\n");
281            for failure in &result.test_failures {
282                feedback.push_str("- ");
283                feedback.push_str(failure);
284                feedback.push('\n');
285            }
286        }
287
288        if result.clippy_warnings > 0 {
289            feedback.push_str(&format!("\n## Clippy Warnings: {}\n", result.clippy_warnings));
290        }
291
292        feedback
293    }
294}
295
296impl Default for VerificationLoop {
297    fn default() -> Self {
298        Self::new(3)
299    }
300}
301
302// ============================================================================
303// DECY-ML-004: "COMPILES ON FIRST TRY" METRIC
304// ============================================================================
305
306/// Metrics for tracking "compiles on first try" rate.
307///
308/// DECY-ML-004: Key quality metric from Oracle Acceleration Pipeline.
309/// Target: 85%+ code compiles successfully on first attempt.
310///
311/// # Example
312///
313/// ```
314/// use decy_llm::CompilationMetrics;
315///
316/// let mut metrics = CompilationMetrics::new();
317///
318/// // Record successful first-try compilation
319/// metrics.record_attempt(true, 1);
320///
321/// // Record success after retry
322/// metrics.record_attempt(true, 2);
323///
324/// // Check if meeting target
325/// if metrics.meets_target(0.85) {
326///     println!("Meeting 85% first-try target!");
327/// }
328/// ```
329#[derive(Debug, Clone, Default, Serialize, Deserialize)]
330pub struct CompilationMetrics {
331    /// Total number of compilation attempts
332    total_attempts: u64,
333    /// Number of first-try successes
334    first_try_successes: u64,
335    /// Total iterations summed (for average calculation)
336    total_iterations: u64,
337    /// Histogram of iterations needed per attempt
338    iteration_counts: std::collections::HashMap<usize, u64>,
339}
340
341impl CompilationMetrics {
342    /// Target "compiles on first try" rate (85%).
343    pub const TARGET_RATE: f64 = 0.85;
344
345    /// Create a new metrics tracker.
346    pub fn new() -> Self {
347        Self {
348            total_attempts: 0,
349            first_try_successes: 0,
350            total_iterations: 0,
351            iteration_counts: std::collections::HashMap::new(),
352        }
353    }
354
355    /// Record a compilation attempt.
356    ///
357    /// # Arguments
358    /// * `success` - Whether compilation ultimately succeeded
359    /// * `iterations` - Number of iterations needed (1 = first try)
360    pub fn record_attempt(&mut self, success: bool, iterations: usize) {
361        self.total_attempts += 1;
362        self.total_iterations += iterations as u64;
363
364        // Track first-try successes
365        if success && iterations == 1 {
366            self.first_try_successes += 1;
367        }
368
369        // Update histogram
370        *self.iteration_counts.entry(iterations).or_insert(0) += 1;
371    }
372
373    /// Get total number of compilation attempts.
374    pub fn total_attempts(&self) -> u64 {
375        self.total_attempts
376    }
377
378    /// Get number of first-try successes.
379    pub fn first_try_successes(&self) -> u64 {
380        self.first_try_successes
381    }
382
383    /// Calculate "compiles on first try" rate (0.0 - 1.0).
384    pub fn first_try_rate(&self) -> f64 {
385        if self.total_attempts == 0 {
386            return 0.0;
387        }
388        self.first_try_successes as f64 / self.total_attempts as f64
389    }
390
391    /// Check if current rate meets or exceeds target.
392    pub fn meets_target(&self, target: f64) -> bool {
393        self.first_try_rate() >= target
394    }
395
396    /// Calculate average iterations needed per attempt.
397    pub fn average_iterations(&self) -> f64 {
398        if self.total_attempts == 0 {
399            return 0.0;
400        }
401        self.total_iterations as f64 / self.total_attempts as f64
402    }
403
404    /// Get iteration histogram (iterations → count).
405    pub fn iteration_histogram(&self) -> &std::collections::HashMap<usize, u64> {
406        &self.iteration_counts
407    }
408
409    /// Reset all metrics to zero.
410    pub fn reset(&mut self) {
411        self.total_attempts = 0;
412        self.first_try_successes = 0;
413        self.total_iterations = 0;
414        self.iteration_counts.clear();
415    }
416}