codetether_agent/rlm/oracle/validator/
mod.rs1use std::time::Instant;
9
10use super::grep_oracle::GrepOracle;
11use super::schema::FinalPayload;
12use crate::rlm::repl::RlmAnalysisResult;
13
14pub use super::ast_validation::validate_ast_payload;
15pub use super::batch::{BatchValidationStats, SplitWriteStats};
16pub use super::consensus::{build_base_trace, validate_with_consensus};
17#[allow(unused_imports)]
18pub use super::consensus_helpers::build_placeholder_trace;
19pub use super::grep_validation::validate_grep_payload;
20#[allow(unused_imports)]
21pub use super::record::OracleTraceRecord;
22pub use super::trace_types::{OracleResult, ValidatedTrace};
23pub use super::types::{TraceStep, VerificationMethod};
24
25#[derive(Debug, Clone)]
27pub struct Config {
28 pub confidence_threshold: f32,
30 pub consensus_threshold: f32,
32}
33
34impl Default for Config {
35 fn default() -> Self {
36 Self {
37 confidence_threshold: 0.95,
38 consensus_threshold: 1.0,
39 }
40 }
41}
42
43#[derive(Debug, Clone)]
45pub struct TraceValidator {
46 config: Config,
47}
48
49impl Default for TraceValidator {
50 fn default() -> Self {
51 Self::new()
52 }
53}
54
55impl TraceValidator {
56 pub fn new() -> Self {
58 Self {
59 config: Config::default(),
60 }
61 }
62
63 pub fn with_confidence_threshold(mut self, threshold: f32) -> Self {
65 self.config.confidence_threshold = threshold.clamp(0.0, 1.0);
66 self
67 }
68
69 pub fn with_consensus_threshold(mut self, threshold: f32) -> Self {
71 self.config.consensus_threshold = threshold.clamp(0.0, 1.0);
72 self
73 }
74
75 pub fn validate(
77 &self,
78 result: &RlmAnalysisResult,
79 source: &str,
80 source_path: Option<&str>,
81 repo_revision: Option<&str>,
82 trace_steps: Option<Vec<TraceStep>>,
83 ) -> OracleResult {
84 let _start = Instant::now();
85
86 let final_payload = FinalPayload::parse(&result.answer);
87 let query = result
88 .sub_queries
89 .first()
90 .map(|sq| sq.query.clone())
91 .unwrap_or_else(|| "unknown query".to_string());
92
93 let base_trace = || {
94 build_base_trace(
95 result,
96 source_path,
97 repo_revision,
98 trace_steps.clone(),
99 final_payload.clone(),
100 )
101 };
102
103 match &final_payload {
104 FinalPayload::Grep(_) => validate_grep_payload(
105 &final_payload,
106 source,
107 self.config.confidence_threshold,
108 base_trace,
109 ),
110 FinalPayload::Ast(_) => validate_ast_payload(
111 &final_payload,
112 source,
113 self.config.confidence_threshold,
114 base_trace,
115 ),
116 FinalPayload::Semantic(_) => {
117 let mut trace = base_trace();
118 trace.verdict = "unverified".to_string();
119 OracleResult::Unverified {
120 reason: "Semantic queries require LLM understanding - no deterministic oracle available".to_string(),
121 trace,
122 }
123 }
124 FinalPayload::Malformed { error, raw } => {
125 let trimmed = raw.trim_start();
126 if trimmed.starts_with('{') || trimmed.starts_with('[') {
127 let mut trace = base_trace();
128 trace.verdict = "failed".to_string();
129 OracleResult::Failed {
130 reason: format!("Malformed FINAL payload: {}", error),
131 diff: None,
132 trace,
133 }
134 } else {
135 self.validate_plain_text(&query, source, &result.answer, base_trace)
136 }
137 }
138 }
139 }
140
141 fn validate_plain_text(
142 &self,
143 query: &str,
144 source: &str,
145 answer: &str,
146 base_trace: impl FnOnce() -> ValidatedTrace,
147 ) -> OracleResult {
148 match GrepOracle::classify_query(query) {
149 super::QueryType::PatternMatch => {
150 let oracle = GrepOracle::new(source.to_string());
151 let verification = oracle.verify(answer, query);
152 self.oracle_result_from_grep_verification(verification, base_trace)
153 }
154 super::QueryType::Structural => {
155 let mut trace = base_trace();
156 trace.verdict = "unverified".to_string();
157 OracleResult::Unverified {
158 reason: "Structured query result was not emitted as FINAL(JSON)".to_string(),
159 trace,
160 }
161 }
162 super::QueryType::Semantic => {
163 let mut trace = base_trace();
164 trace.verdict = "unverified".to_string();
165 OracleResult::Unverified {
166 reason: "Semantic query - no deterministic oracle available".to_string(),
167 trace,
168 }
169 }
170 }
171 }
172
173 fn oracle_result_from_grep_verification(
174 &self,
175 verification: super::grep_oracle::GrepVerification,
176 base_trace: impl FnOnce() -> ValidatedTrace,
177 ) -> OracleResult {
178 use super::grep_oracle::GrepVerification;
179
180 match verification {
181 GrepVerification::ExactMatch | GrepVerification::UnorderedMatch => {
182 let mut trace = base_trace();
183 trace.verification_method = VerificationMethod::GrepOracle;
184 trace.verdict = "golden".to_string();
185 OracleResult::Golden(trace)
186 }
187 GrepVerification::CannotVerify { reason } => {
188 let mut trace = base_trace();
189 trace.verdict = "unverified".to_string();
190 OracleResult::Unverified { reason, trace }
191 }
192 _ => {
193 let diff = format!("Grep verification failed: {:?}", verification);
194 let mut trace = base_trace();
195 trace.verification_method = VerificationMethod::GrepOracle;
196 trace.verdict = "failed".to_string();
197 trace.oracle_diff = Some(diff.clone());
198 OracleResult::Failed {
199 reason: diff.clone(),
200 diff: Some(diff),
201 trace,
202 }
203 }
204 }
205 }
206
207 pub fn validate_with_consensus(
209 &self,
210 results: &[RlmAnalysisResult],
211 _source: &str,
212 source_path: Option<&str>,
213 repo_revision: Option<&str>,
214 trace_steps: Option<Vec<TraceStep>>,
215 ) -> OracleResult {
216 validate_with_consensus(
217 results,
218 source_path,
219 repo_revision,
220 trace_steps,
221 self.config.consensus_threshold,
222 )
223 }
224
225 pub fn batch_validate<'a>(
227 &self,
228 traces: impl IntoIterator<Item = (RlmAnalysisResult, &'a str, Option<&'a str>)>,
229 ) -> BatchValidationStats {
230 self.batch_validate_with_options(traces, None, None)
231 }
232
233 pub fn batch_validate_with_options<'a>(
235 &self,
236 traces: impl IntoIterator<Item = (RlmAnalysisResult, &'a str, Option<&'a str>)>,
237 repo_revision: Option<&str>,
238 trace_steps: Option<Vec<TraceStep>>,
239 ) -> BatchValidationStats {
240 let mut stats = BatchValidationStats::default();
241
242 for (result, source, source_path) in traces {
243 match self.validate(
244 &result,
245 source,
246 source_path,
247 repo_revision,
248 trace_steps.clone(),
249 ) {
250 OracleResult::Golden(trace) => stats.golden.push(trace),
251 OracleResult::Consensus { trace, .. } => stats.consensus.push(trace),
252 OracleResult::Unverified { reason, trace } => {
253 stats.unverified.push((trace, reason));
254 }
255 OracleResult::Failed { reason, trace, .. } => {
256 stats.failed.push((trace, reason));
257 }
258 }
259 }
260 stats
261 }
262}