Skip to main content

codetether_agent/rlm/oracle/validator/
mod.rs

1//! Main trace validator entry point.
2//!
3//! Orchestrates validation of RLM analysis results by:
4//! 1. Classifying the query type (pattern-match vs structural vs semantic)
5//! 2. Routing to the appropriate oracle
6//! 3. Marking traces as "golden" (verified), "unverified", or "failed"
7
8use std::time::Instant;
9
10use super::grep_oracle::GrepOracle;
11use super::schema::FinalPayload;
12use crate::rlm::repl::RlmAnalysisResult;
13
14pub use super::ast_validation::validate_ast_payload;
15pub use super::batch::{BatchValidationStats, SplitWriteStats};
16pub use super::consensus::{build_base_trace, validate_with_consensus};
17#[allow(unused_imports)]
18pub use super::consensus_helpers::build_placeholder_trace;
19pub use super::grep_validation::validate_grep_payload;
20#[allow(unused_imports)]
21pub use super::record::OracleTraceRecord;
22pub use super::trace_types::{OracleResult, ValidatedTrace};
23pub use super::types::{TraceStep, VerificationMethod};
24
25/// Configuration for the trace validator.
26#[derive(Debug, Clone)]
27pub struct Config {
28    /// Minimum coverage ratio for golden classification (0.0-1.0)
29    pub confidence_threshold: f32,
30    /// Agreement threshold for semantic consensus checks (0.0-1.0)
31    pub consensus_threshold: f32,
32}
33
34impl Default for Config {
35    fn default() -> Self {
36        Self {
37            confidence_threshold: 0.95,
38            consensus_threshold: 1.0,
39        }
40    }
41}
42
43/// Main trace validator for RLM REPL outputs.
44#[derive(Debug, Clone)]
45pub struct TraceValidator {
46    config: Config,
47}
48
49impl Default for TraceValidator {
50    fn default() -> Self {
51        Self::new()
52    }
53}
54
55impl TraceValidator {
56    /// Create a new trace validator with default configuration.
57    pub fn new() -> Self {
58        Self {
59            config: Config::default(),
60        }
61    }
62
63    /// Set the confidence threshold for golden classification.
64    pub fn with_confidence_threshold(mut self, threshold: f32) -> Self {
65        self.config.confidence_threshold = threshold.clamp(0.0, 1.0);
66        self
67    }
68
69    /// Set consensus threshold for semantic query verification.
70    pub fn with_consensus_threshold(mut self, threshold: f32) -> Self {
71        self.config.consensus_threshold = threshold.clamp(0.0, 1.0);
72        self
73    }
74
75    /// Validate an RLM analysis result against source code.
76    pub fn validate(
77        &self,
78        result: &RlmAnalysisResult,
79        source: &str,
80        source_path: Option<&str>,
81        repo_revision: Option<&str>,
82        trace_steps: Option<Vec<TraceStep>>,
83    ) -> OracleResult {
84        let _start = Instant::now();
85
86        let final_payload = FinalPayload::parse(&result.answer);
87        let query = result
88            .sub_queries
89            .first()
90            .map(|sq| sq.query.clone())
91            .unwrap_or_else(|| "unknown query".to_string());
92
93        let base_trace = || {
94            build_base_trace(
95                result,
96                source_path,
97                repo_revision,
98                trace_steps.clone(),
99                final_payload.clone(),
100            )
101        };
102
103        match &final_payload {
104            FinalPayload::Grep(_) => validate_grep_payload(
105                &final_payload,
106                source,
107                self.config.confidence_threshold,
108                base_trace,
109            ),
110            FinalPayload::Ast(_) => validate_ast_payload(
111                &final_payload,
112                source,
113                self.config.confidence_threshold,
114                base_trace,
115            ),
116            FinalPayload::Semantic(_) => {
117                let mut trace = base_trace();
118                trace.verdict = "unverified".to_string();
119                OracleResult::Unverified {
120                    reason: "Semantic queries require LLM understanding - no deterministic oracle available".to_string(),
121                    trace,
122                }
123            }
124            FinalPayload::Malformed { error, raw } => {
125                let trimmed = raw.trim_start();
126                if trimmed.starts_with('{') || trimmed.starts_with('[') {
127                    let mut trace = base_trace();
128                    trace.verdict = "failed".to_string();
129                    OracleResult::Failed {
130                        reason: format!("Malformed FINAL payload: {}", error),
131                        diff: None,
132                        trace,
133                    }
134                } else {
135                    self.validate_plain_text(&query, source, &result.answer, base_trace)
136                }
137            }
138        }
139    }
140
141    fn validate_plain_text(
142        &self,
143        query: &str,
144        source: &str,
145        answer: &str,
146        base_trace: impl FnOnce() -> ValidatedTrace,
147    ) -> OracleResult {
148        match GrepOracle::classify_query(query) {
149            super::QueryType::PatternMatch => {
150                let oracle = GrepOracle::new(source.to_string());
151                let verification = oracle.verify(answer, query);
152                self.oracle_result_from_grep_verification(verification, base_trace)
153            }
154            super::QueryType::Structural => {
155                let mut trace = base_trace();
156                trace.verdict = "unverified".to_string();
157                OracleResult::Unverified {
158                    reason: "Structured query result was not emitted as FINAL(JSON)".to_string(),
159                    trace,
160                }
161            }
162            super::QueryType::Semantic => {
163                let mut trace = base_trace();
164                trace.verdict = "unverified".to_string();
165                OracleResult::Unverified {
166                    reason: "Semantic query - no deterministic oracle available".to_string(),
167                    trace,
168                }
169            }
170        }
171    }
172
173    fn oracle_result_from_grep_verification(
174        &self,
175        verification: super::grep_oracle::GrepVerification,
176        base_trace: impl FnOnce() -> ValidatedTrace,
177    ) -> OracleResult {
178        use super::grep_oracle::GrepVerification;
179
180        match verification {
181            GrepVerification::ExactMatch | GrepVerification::UnorderedMatch => {
182                let mut trace = base_trace();
183                trace.verification_method = VerificationMethod::GrepOracle;
184                trace.verdict = "golden".to_string();
185                OracleResult::Golden(trace)
186            }
187            GrepVerification::CannotVerify { reason } => {
188                let mut trace = base_trace();
189                trace.verdict = "unverified".to_string();
190                OracleResult::Unverified { reason, trace }
191            }
192            _ => {
193                let diff = format!("Grep verification failed: {:?}", verification);
194                let mut trace = base_trace();
195                trace.verification_method = VerificationMethod::GrepOracle;
196                trace.verdict = "failed".to_string();
197                trace.oracle_diff = Some(diff.clone());
198                OracleResult::Failed {
199                    reason: diff.clone(),
200                    diff: Some(diff),
201                    trace,
202                }
203            }
204        }
205    }
206
207    /// Validate multiple semantic runs using strict consensus.
208    pub fn validate_with_consensus(
209        &self,
210        results: &[RlmAnalysisResult],
211        _source: &str,
212        source_path: Option<&str>,
213        repo_revision: Option<&str>,
214        trace_steps: Option<Vec<TraceStep>>,
215    ) -> OracleResult {
216        validate_with_consensus(
217            results,
218            source_path,
219            repo_revision,
220            trace_steps,
221            self.config.consensus_threshold,
222        )
223    }
224
225    /// Batch validate multiple traces and return statistics.
226    pub fn batch_validate<'a>(
227        &self,
228        traces: impl IntoIterator<Item = (RlmAnalysisResult, &'a str, Option<&'a str>)>,
229    ) -> BatchValidationStats {
230        self.batch_validate_with_options(traces, None, None)
231    }
232
233    /// Batch validate with additional options.
234    pub fn batch_validate_with_options<'a>(
235        &self,
236        traces: impl IntoIterator<Item = (RlmAnalysisResult, &'a str, Option<&'a str>)>,
237        repo_revision: Option<&str>,
238        trace_steps: Option<Vec<TraceStep>>,
239    ) -> BatchValidationStats {
240        let mut stats = BatchValidationStats::default();
241
242        for (result, source, source_path) in traces {
243            match self.validate(
244                &result,
245                source,
246                source_path,
247                repo_revision,
248                trace_steps.clone(),
249            ) {
250                OracleResult::Golden(trace) => stats.golden.push(trace),
251                OracleResult::Consensus { trace, .. } => stats.consensus.push(trace),
252                OracleResult::Unverified { reason, trace } => {
253                    stats.unverified.push((trace, reason));
254                }
255                OracleResult::Failed { reason, trace, .. } => {
256                    stats.failed.push((trace, reason));
257                }
258            }
259        }
260        stats
261    }
262}