Skip to main content

verificar/oracle/
mod.rs

1//! Verification oracle for transpilation correctness
2//!
3//! The oracle executes source and target code and compares outputs
4//! to verify transpilation correctness.
5//!
6//! # Verification Strategy
7//!
8//! From spec Section 5.1:
9//! 1. **Fast path (I/O oracle)**: Execute source & target, diff outputs - catches 95%+ of bugs
10//! 2. **Slow path (SMT/Z3)**: For critical paths (security-sensitive, memory ops)
11//! 3. **Property proofs**: Encode transpilation invariants as SMT constraints
12//!
13//! # Example
14//!
15//! ```rust,ignore
16//! use verificar::oracle::{IoOracle, DiffOptions};
17//!
18//! let oracle = IoOracle::new();
19//! let result = oracle.verify_python("print(1+1)", "2\n")?;
20//! assert!(result.matches);
21//! ```
22
23mod diff;
24mod executor;
25mod sandbox;
26mod semantic;
27
28pub use diff::{diff_results, format_diff, DiffOptions, DiffResult, Difference, DifferenceKind};
29pub use executor::{executor_for, Executor, PythonExecutor, RustExecutor};
30pub use sandbox::{SandboxConfig, SandboxedPythonExecutor};
31pub use semantic::{
32    AstSemanticOracle, CombinedSemanticOracle, Complexity, DifferenceCategory, DifferenceDetails,
33    FormalVerificationOracle, HeapAllocation, MemoryLayout, PerformanceProfile, SemanticDifference,
34    SemanticNode, SemanticOracle, SemanticVerdict,
35};
36
37use crate::{Language, Result};
38
39/// Result of executing code
40#[derive(Debug, Clone, PartialEq, Eq)]
41pub struct ExecutionResult {
42    /// Standard output
43    pub stdout: String,
44    /// Standard error
45    pub stderr: String,
46    /// Exit code
47    pub exit_code: i32,
48    /// Execution time in milliseconds
49    pub duration_ms: u64,
50}
51
52/// Verdict from verification
53#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
54pub enum Verdict {
55    /// I/O equivalent - transpilation is correct
56    Pass,
57    /// Output mismatch - transpilation bug detected
58    OutputMismatch {
59        /// Expected output (from source)
60        expected: String,
61        /// Actual output (from target)
62        actual: String,
63    },
64    /// Timeout during execution
65    Timeout {
66        /// Which phase timed out
67        phase: Phase,
68        /// Timeout limit in milliseconds
69        limit_ms: u64,
70    },
71    /// Runtime error
72    RuntimeError {
73        /// Phase where error occurred
74        phase: Phase,
75        /// Error message
76        error: String,
77    },
78}
79
80/// Verification result with full metadata
81#[derive(Debug, Clone)]
82pub struct VerificationResult {
83    /// Source code
84    pub source_code: String,
85    /// Source language
86    pub source_language: Language,
87    /// Target code
88    pub target_code: String,
89    /// Target language
90    pub target_language: Language,
91    /// Verification verdict
92    pub verdict: Verdict,
93    /// Source execution result (if available)
94    pub source_result: Option<ExecutionResult>,
95    /// Target execution result (if available)
96    pub target_result: Option<ExecutionResult>,
97}
98
99/// Phase of execution
100#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
101pub enum Phase {
102    /// Executing source code
103    Source,
104    /// Executing target code
105    Target,
106}
107
108impl std::fmt::Display for Phase {
109    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
110        match self {
111            Self::Source => write!(f, "source"),
112            Self::Target => write!(f, "target"),
113        }
114    }
115}
116
117/// Verification oracle trait
118///
119/// Standardized oracle interface enables cross-transpiler comparison.
120pub trait Oracle: Send + Sync {
121    /// Execute source code and return result
122    ///
123    /// # Errors
124    ///
125    /// Returns an error if execution fails
126    fn execute_source(&self, code: &str, input: &str) -> Result<ExecutionResult>;
127
128    /// Execute target code and return result
129    ///
130    /// # Errors
131    ///
132    /// Returns an error if execution fails
133    fn execute_target(&self, code: &str, input: &str) -> Result<ExecutionResult>;
134
135    /// Compare source and target execution results
136    fn compare(&self, source: &ExecutionResult, target: &ExecutionResult) -> Verdict;
137
138    /// Get the timeout for execution in milliseconds
139    fn timeout_ms(&self) -> u64 {
140        5000 // 5 second default
141    }
142}
143
144/// Default I/O-based verification oracle
145pub struct IoOracle {
146    timeout_ms: u64,
147    diff_options: DiffOptions,
148    source_executor: Box<dyn Executor>,
149    target_executor: Option<Box<dyn Executor>>,
150}
151
152impl std::fmt::Debug for IoOracle {
153    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
154        f.debug_struct("IoOracle")
155            .field("timeout_ms", &self.timeout_ms)
156            .field("diff_options", &self.diff_options)
157            .field(
158                "source_executor",
159                &format!("<{}>", self.source_executor.language()),
160            )
161            .field(
162                "target_executor",
163                &self
164                    .target_executor
165                    .as_ref()
166                    .map(|e| format!("<{}>", e.language())),
167            )
168            .finish()
169    }
170}
171
172impl Default for IoOracle {
173    fn default() -> Self {
174        Self::new()
175    }
176}
177
178impl IoOracle {
179    /// Create a new I/O oracle with default settings (Python source)
180    #[must_use]
181    pub fn new() -> Self {
182        Self {
183            timeout_ms: 5000,
184            diff_options: DiffOptions::default(),
185            source_executor: Box::new(PythonExecutor::new()),
186            target_executor: None,
187        }
188    }
189
190    /// Create an I/O oracle with custom timeout
191    #[must_use]
192    pub fn with_timeout(mut self, timeout_ms: u64) -> Self {
193        self.timeout_ms = timeout_ms;
194        self
195    }
196
197    /// Set diff options for comparison
198    #[must_use]
199    pub fn with_diff_options(mut self, options: DiffOptions) -> Self {
200        self.diff_options = options;
201        self
202    }
203
204    /// Set the source language executor
205    #[must_use]
206    pub fn with_source_executor(mut self, executor: Box<dyn Executor>) -> Self {
207        self.source_executor = executor;
208        self
209    }
210
211    /// Set the target language executor
212    #[must_use]
213    pub fn with_target_executor(mut self, executor: Box<dyn Executor>) -> Self {
214        self.target_executor = Some(executor);
215        self
216    }
217
218    /// Verify transpilation correctness
219    ///
220    /// # Errors
221    ///
222    /// Returns an error if verification fails
223    pub fn verify(
224        &self,
225        source_code: &str,
226        target_code: &str,
227        input: &str,
228        source_lang: Language,
229        target_lang: Language,
230    ) -> Result<VerificationResult> {
231        let source_result = self.execute_source(source_code, input)?;
232
233        let target_result = if let Some(ref target_exec) = self.target_executor {
234            target_exec.execute(target_code, input, self.timeout_ms)?
235        } else {
236            self.execute_target(target_code, input)?
237        };
238
239        let verdict = self.compare(&source_result, &target_result);
240
241        Ok(VerificationResult {
242            source_code: source_code.to_string(),
243            source_language: source_lang,
244            target_code: target_code.to_string(),
245            target_language: target_lang,
246            verdict,
247            source_result: Some(source_result),
248            target_result: Some(target_result),
249        })
250    }
251
252    /// Execute Python code and verify against expected output
253    ///
254    /// # Errors
255    ///
256    /// Returns an error if execution fails
257    pub fn verify_python(&self, code: &str, expected_output: &str) -> Result<DiffResult> {
258        let result = self.source_executor.execute(code, "", self.timeout_ms)?;
259
260        let expected = ExecutionResult {
261            stdout: expected_output.to_string(),
262            stderr: String::new(),
263            exit_code: 0,
264            duration_ms: 0,
265        };
266
267        Ok(diff_results(&expected, &result, &self.diff_options))
268    }
269
270    /// Execute Python code and verify against expected output with input
271    ///
272    /// # Errors
273    ///
274    /// Returns an error if execution fails
275    pub fn verify_python_with_input(
276        &self,
277        code: &str,
278        input: &str,
279        expected_output: &str,
280    ) -> Result<DiffResult> {
281        let result = self.source_executor.execute(code, input, self.timeout_ms)?;
282
283        let expected = ExecutionResult {
284            stdout: expected_output.to_string(),
285            stderr: String::new(),
286            exit_code: 0,
287            duration_ms: 0,
288        };
289
290        Ok(diff_results(&expected, &result, &self.diff_options))
291    }
292
293    /// Execute code and return raw result
294    ///
295    /// # Errors
296    ///
297    /// Returns an error if execution fails
298    pub fn execute_python(&self, code: &str, input: &str) -> Result<ExecutionResult> {
299        self.source_executor.execute(code, input, self.timeout_ms)
300    }
301
302    /// Get diff options
303    #[must_use]
304    pub fn diff_options(&self) -> &DiffOptions {
305        &self.diff_options
306    }
307}
308
309impl Oracle for IoOracle {
310    fn execute_source(&self, code: &str, input: &str) -> Result<ExecutionResult> {
311        self.source_executor.execute(code, input, self.timeout_ms)
312    }
313
314    fn execute_target(&self, code: &str, input: &str) -> Result<ExecutionResult> {
315        if let Some(ref executor) = self.target_executor {
316            executor.execute(code, input, self.timeout_ms)
317        } else {
318            // Default: use same executor as source (for same-language testing)
319            self.source_executor.execute(code, input, self.timeout_ms)
320        }
321    }
322
323    fn compare(&self, source: &ExecutionResult, target: &ExecutionResult) -> Verdict {
324        let diff = diff_results(source, target, &self.diff_options);
325
326        if diff.matches {
327            Verdict::Pass
328        } else {
329            Verdict::OutputMismatch {
330                expected: source.stdout.clone(),
331                actual: target.stdout.clone(),
332            }
333        }
334    }
335
336    fn timeout_ms(&self) -> u64 {
337        self.timeout_ms
338    }
339}
340
341#[cfg(test)]
342mod tests {
343    use super::*;
344
345    #[test]
346    fn test_verdict_pass() {
347        let oracle = IoOracle::new();
348        let source = ExecutionResult {
349            stdout: "hello".to_string(),
350            stderr: String::new(),
351            exit_code: 0,
352            duration_ms: 10,
353        };
354        let target = ExecutionResult {
355            stdout: "hello".to_string(),
356            stderr: String::new(),
357            exit_code: 0,
358            duration_ms: 5,
359        };
360
361        let verdict = oracle.compare(&source, &target);
362        assert_eq!(verdict, Verdict::Pass);
363    }
364
365    #[test]
366    fn test_verdict_mismatch() {
367        let oracle = IoOracle::new();
368        let source = ExecutionResult {
369            stdout: "hello".to_string(),
370            stderr: String::new(),
371            exit_code: 0,
372            duration_ms: 10,
373        };
374        let target = ExecutionResult {
375            stdout: "world".to_string(),
376            stderr: String::new(),
377            exit_code: 0,
378            duration_ms: 5,
379        };
380
381        let verdict = oracle.compare(&source, &target);
382        assert!(matches!(verdict, Verdict::OutputMismatch { .. }));
383    }
384
385    #[test]
386    fn test_phase_display() {
387        assert_eq!(format!("{}", Phase::Source), "source");
388        assert_eq!(format!("{}", Phase::Target), "target");
389    }
390
391    #[test]
392    fn test_io_oracle_verify_python() {
393        let oracle = IoOracle::new();
394        let executor = PythonExecutor::new();
395
396        if !executor.is_available() {
397            eprintln!("Python not available, skipping test");
398            return;
399        }
400
401        let result = oracle
402            .verify_python("print('hello')", "hello\n")
403            .expect("verification should succeed");
404
405        assert!(result.matches);
406    }
407
408    #[test]
409    fn test_io_oracle_verify_python_mismatch() {
410        let oracle = IoOracle::new();
411        let executor = PythonExecutor::new();
412
413        if !executor.is_available() {
414            eprintln!("Python not available, skipping test");
415            return;
416        }
417
418        let result = oracle
419            .verify_python("print('hello')", "world\n")
420            .expect("verification should succeed");
421
422        assert!(!result.matches);
423    }
424
425    #[test]
426    fn test_io_oracle_execute_python() {
427        let oracle = IoOracle::new();
428        let executor = PythonExecutor::new();
429
430        if !executor.is_available() {
431            eprintln!("Python not available, skipping test");
432            return;
433        }
434
435        let result = oracle
436            .execute_python("print(2 + 2)", "")
437            .expect("execution should succeed");
438
439        assert_eq!(result.stdout.trim(), "4");
440        assert_eq!(result.exit_code, 0);
441    }
442
443    #[test]
444    fn test_io_oracle_with_diff_options() {
445        let oracle = IoOracle::new()
446            .with_diff_options(DiffOptions::lenient())
447            .with_timeout(10000);
448
449        assert_eq!(oracle.timeout_ms(), 10000);
450        assert!(oracle.diff_options().normalize_whitespace);
451    }
452
453    #[test]
454    fn test_io_oracle_same_code_verification() {
455        let oracle = IoOracle::new();
456        let executor = PythonExecutor::new();
457
458        if !executor.is_available() {
459            eprintln!("Python not available, skipping test");
460            return;
461        }
462
463        // Verify same code produces same output
464        let code = "x = 5\nprint(x * 2)";
465        let result = oracle
466            .verify(code, code, "", Language::Python, Language::Python)
467            .expect("verification should succeed");
468
469        assert_eq!(result.verdict, Verdict::Pass);
470    }
471
472    #[test]
473    fn test_io_oracle_default() {
474        let oracle = IoOracle::default();
475        assert_eq!(oracle.timeout_ms(), 5000);
476    }
477
478    #[test]
479    fn test_io_oracle_debug() {
480        let oracle = IoOracle::new();
481        let debug = format!("{:?}", oracle);
482        assert!(debug.contains("IoOracle"));
483        assert!(debug.contains("timeout_ms"));
484    }
485
486    #[test]
487    fn test_io_oracle_with_source_executor() {
488        let oracle = IoOracle::new().with_source_executor(Box::new(PythonExecutor::new()));
489        assert_eq!(oracle.source_executor.language(), Language::Python);
490    }
491
492    #[test]
493    fn test_io_oracle_with_target_executor() {
494        let executor = PythonExecutor::new();
495        if !executor.is_available() {
496            return;
497        }
498
499        let oracle = IoOracle::new().with_target_executor(Box::new(PythonExecutor::new()));
500        assert!(oracle.target_executor.is_some());
501
502        // Test execute_target uses target executor
503        let result = oracle.execute_target("print(1)", "");
504        assert!(result.is_ok());
505    }
506
507    #[test]
508    fn test_io_oracle_verify_python_with_input() {
509        let oracle = IoOracle::new();
510        let executor = PythonExecutor::new();
511
512        if !executor.is_available() {
513            eprintln!("Python not available, skipping test");
514            return;
515        }
516
517        let code = "x = input()\nprint(f'Hello {x}')";
518        let result = oracle
519            .verify_python_with_input(code, "World", "Hello World\n")
520            .expect("verification should succeed");
521
522        assert!(result.matches);
523    }
524
525    #[test]
526    fn test_io_oracle_verify_with_target_executor() {
527        let executor = PythonExecutor::new();
528        if !executor.is_available() {
529            return;
530        }
531
532        let oracle = IoOracle::new().with_target_executor(Box::new(PythonExecutor::new()));
533
534        let code = "print(42)";
535        let result = oracle
536            .verify(code, code, "", Language::Python, Language::Python)
537            .expect("verification should succeed");
538
539        assert_eq!(result.verdict, Verdict::Pass);
540    }
541
542    #[test]
543    fn test_execution_result_debug() {
544        let result = ExecutionResult {
545            stdout: "test".to_string(),
546            stderr: String::new(),
547            exit_code: 0,
548            duration_ms: 100,
549        };
550        let debug = format!("{:?}", result);
551        assert!(debug.contains("ExecutionResult"));
552    }
553
554    #[test]
555    fn test_execution_result_clone() {
556        let result = ExecutionResult {
557            stdout: "test".to_string(),
558            stderr: "err".to_string(),
559            exit_code: 1,
560            duration_ms: 100,
561        };
562        let cloned = result.clone();
563        assert_eq!(cloned.stdout, result.stdout);
564        assert_eq!(cloned.exit_code, result.exit_code);
565    }
566
567    #[test]
568    fn test_verdict_debug() {
569        let verdict = Verdict::Pass;
570        let debug = format!("{:?}", verdict);
571        assert!(debug.contains("Pass"));
572
573        let mismatch = Verdict::OutputMismatch {
574            expected: "a".to_string(),
575            actual: "b".to_string(),
576        };
577        let debug2 = format!("{:?}", mismatch);
578        assert!(debug2.contains("OutputMismatch"));
579
580        let timeout = Verdict::Timeout {
581            phase: Phase::Source,
582            limit_ms: 5000,
583        };
584        let debug3 = format!("{:?}", timeout);
585        assert!(debug3.contains("Timeout"));
586
587        let error = Verdict::RuntimeError {
588            phase: Phase::Target,
589            error: "error".to_string(),
590        };
591        let debug4 = format!("{:?}", error);
592        assert!(debug4.contains("RuntimeError"));
593    }
594
595    #[test]
596    fn test_verdict_clone() {
597        let verdict = Verdict::OutputMismatch {
598            expected: "a".to_string(),
599            actual: "b".to_string(),
600        };
601        let cloned = verdict.clone();
602        assert_eq!(cloned, verdict);
603    }
604
605    #[test]
606    fn test_verification_result_debug() {
607        let result = VerificationResult {
608            source_code: "print(1)".to_string(),
609            source_language: Language::Python,
610            target_code: "fn main() {}".to_string(),
611            target_language: Language::Rust,
612            verdict: Verdict::Pass,
613            source_result: None,
614            target_result: None,
615        };
616        let debug = format!("{:?}", result);
617        assert!(debug.contains("VerificationResult"));
618    }
619
620    #[test]
621    fn test_verification_result_clone() {
622        let result = VerificationResult {
623            source_code: "print(1)".to_string(),
624            source_language: Language::Python,
625            target_code: "fn main() {}".to_string(),
626            target_language: Language::Rust,
627            verdict: Verdict::Pass,
628            source_result: None,
629            target_result: None,
630        };
631        let cloned = result.clone();
632        assert_eq!(cloned.source_code, result.source_code);
633    }
634
635    #[test]
636    fn test_phase_debug() {
637        let phase = Phase::Source;
638        let debug = format!("{:?}", phase);
639        assert!(debug.contains("Source"));
640    }
641
642    #[test]
643    fn test_phase_clone() {
644        let phase = Phase::Target;
645        let cloned = phase.clone();
646        assert_eq!(cloned, phase);
647    }
648
649    #[test]
650    fn test_phase_copy() {
651        let phase = Phase::Source;
652        let copied = phase;
653        assert_eq!(copied, Phase::Source);
654    }
655}