use anyhow::Result;
use chrono::Utc;
use serde::{Deserialize, Serialize};
use std::time::Instant;
use super::schema::{AstResult, FinalPayload};
use super::{grep_oracle::GrepOracle, tree_sitter_oracle::TreeSitterOracle};
use crate::rlm::repl::RlmAnalysisResult;
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum OracleResult {
Golden(ValidatedTrace),
Consensus {
trace: ValidatedTrace,
agreement_ratio: f32,
},
Unverified {
reason: String,
trace: ValidatedTrace,
},
Failed {
reason: String,
diff: Option<String>,
trace: ValidatedTrace,
},
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct OracleTraceRecord {
pub verdict: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub reason: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub agreement_ratio: Option<f32>,
pub trace: ValidatedTrace,
}
impl OracleResult {
pub fn to_record(&self) -> OracleTraceRecord {
match self {
OracleResult::Golden(trace) => OracleTraceRecord {
verdict: "golden".to_string(),
reason: None,
agreement_ratio: None,
trace: trace.clone(),
},
OracleResult::Consensus {
trace,
agreement_ratio,
} => OracleTraceRecord {
verdict: "consensus".to_string(),
reason: None,
agreement_ratio: Some(*agreement_ratio),
trace: trace.clone(),
},
OracleResult::Unverified { reason, trace } => OracleTraceRecord {
verdict: "unverified".to_string(),
reason: Some(reason.clone()),
agreement_ratio: None,
trace: trace.clone(),
},
OracleResult::Failed { reason, trace, .. } => OracleTraceRecord {
verdict: "failed".to_string(),
reason: Some(reason.clone()),
agreement_ratio: None,
trace: trace.clone(),
},
}
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct TraceStep {
pub iteration: usize,
pub action: String,
pub output: String,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ValidatedTrace {
pub prompt: String,
pub trace: Vec<TraceStep>,
#[serde(skip_serializing_if = "Option::is_none")]
pub final_payload: Option<FinalPayload>,
pub verdict: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub oracle_diff: Option<String>,
pub repo_revision: String,
pub timestamp: String,
#[serde(skip)]
pub answer: String,
#[serde(skip)]
pub iterations: usize,
#[serde(skip)]
pub subcalls: usize,
#[serde(skip)]
pub input_tokens: usize,
#[serde(skip)]
pub output_tokens: usize,
#[serde(skip)]
pub elapsed_ms: u64,
#[serde(skip)]
pub source_path: Option<String>,
#[serde(skip)]
pub verification_method: VerificationMethod,
#[serde(skip)]
pub trace_id: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
pub enum VerificationMethod {
GrepOracle,
TreeSitterOracle,
Consensus,
#[default]
None,
}
pub struct TraceValidator {
confidence_threshold: f32,
consensus_threshold: f32,
}
impl Default for TraceValidator {
fn default() -> Self {
Self {
confidence_threshold: 0.95,
consensus_threshold: 1.0,
}
}
}
impl TraceValidator {
pub fn new() -> Self {
Self::default()
}
pub fn with_confidence_threshold(mut self, threshold: f32) -> Self {
self.confidence_threshold = threshold.clamp(0.0, 1.0);
self
}
pub fn with_consensus_threshold(mut self, threshold: f32) -> Self {
self.consensus_threshold = threshold.clamp(0.0, 1.0);
self
}
pub fn validate(
&self,
result: &RlmAnalysisResult,
source: &str,
source_path: Option<&str>,
repo_revision: Option<&str>,
trace_steps: Option<Vec<TraceStep>>,
) -> OracleResult {
let _start = Instant::now();
let final_payload = FinalPayload::parse(&result.answer);
let query = result
.sub_queries
.first()
.map(|sq| sq.query.clone())
.unwrap_or_else(|| "unknown query".to_string());
let base_trace = || {
Self::build_base_trace(
result,
source_path,
repo_revision,
trace_steps.clone(),
final_payload.clone(),
)
};
match &final_payload {
FinalPayload::Grep(_) => {
self.validate_grep_payload(&final_payload, source, source_path, &query, base_trace)
}
FinalPayload::Ast(_) => {
self.validate_ast_payload(&final_payload, source, source_path, &query, base_trace)
}
FinalPayload::Semantic(_) => {
let mut trace = base_trace();
trace.verdict = "unverified".to_string();
OracleResult::Unverified {
reason: "Semantic queries require LLM understanding - no deterministic oracle available".to_string(),
trace,
}
}
FinalPayload::Malformed { error, raw } => {
let trimmed = raw.trim_start();
if trimmed.starts_with('{') || trimmed.starts_with('[') {
let mut trace = base_trace();
trace.verdict = "failed".to_string();
OracleResult::Failed {
reason: format!("Malformed FINAL payload: {}", error),
diff: None,
trace,
}
} else {
match GrepOracle::classify_query(&query) {
super::QueryType::PatternMatch => {
let oracle = GrepOracle::new(source.to_string());
let verification = oracle.verify(&result.answer, &query);
self.oracle_result_from_grep_verification(verification, base_trace)
}
super::QueryType::Structural => {
let mut trace = base_trace();
trace.verdict = "unverified".to_string();
OracleResult::Unverified {
reason: "Structured query result was not emitted as FINAL(JSON); deterministic AST verification requires typed payload".to_string(),
trace,
}
}
super::QueryType::Semantic => {
let mut trace = base_trace();
trace.verdict = "unverified".to_string();
OracleResult::Unverified {
reason: "Semantic/free-form answer without FINAL(JSON) payload"
.to_string(),
trace,
}
}
}
}
}
}
}
pub fn validate_with_consensus(
&self,
results: &[RlmAnalysisResult],
source: &str,
source_path: Option<&str>,
repo_revision: Option<&str>,
trace_steps: Option<Vec<TraceStep>>,
) -> OracleResult {
let trace_steps_for_consensus = trace_steps.clone();
let Some(first) = results.first() else {
let trace = Self::build_placeholder_trace(source_path, repo_revision, None);
return OracleResult::Unverified {
reason: "Consensus validation requires at least one result".to_string(),
trace,
};
};
let first_payload = FinalPayload::parse(&first.answer);
if !matches!(first_payload, FinalPayload::Semantic(_)) {
return self.validate(first, source, source_path, repo_revision, trace_steps);
}
let mut counts: std::collections::HashMap<String, (String, usize)> =
std::collections::HashMap::new();
for result in results {
let payload = FinalPayload::parse(&result.answer);
let semantic = match payload {
FinalPayload::Semantic(p) => p.answer,
_ => {
let trace = Self::build_base_trace(
first,
source_path,
repo_revision,
trace_steps.clone(),
first_payload.clone(),
);
return OracleResult::Unverified {
reason: "Consensus validation requires semantic payloads for all runs"
.to_string(),
trace,
};
}
};
let canonical = canonicalize_semantic_answer(&semantic);
let entry = counts.entry(canonical).or_insert((semantic, 0));
entry.1 += 1;
}
let Some((canonical, (winning_answer, winning_count))) =
counts.into_iter().max_by_key(|(_, (_, count))| *count)
else {
let trace = Self::build_base_trace(
first,
source_path,
repo_revision,
trace_steps.clone(),
first_payload.clone(),
);
return OracleResult::Unverified {
reason: "Consensus validation could not determine a winning answer".to_string(),
trace,
};
};
let agreement_ratio = winning_count as f32 / results.len() as f32;
if agreement_ratio < self.consensus_threshold {
let mut trace = Self::build_base_trace(
first,
source_path,
repo_revision,
trace_steps.clone(),
first_payload.clone(),
);
trace.verdict = "unverified".to_string();
return OracleResult::Unverified {
reason: format!(
"Consensus not reached: best agreement {:.1}% for answer '{}'",
agreement_ratio * 100.0,
canonical
),
trace,
};
}
let mut trace = Self::build_base_trace(
first,
source_path,
repo_revision,
trace_steps_for_consensus,
FinalPayload::Semantic(super::schema::SemanticPayload {
file: source_path.unwrap_or("unknown").to_string(),
answer: winning_answer,
}),
);
trace.verdict = "consensus".to_string();
trace.verification_method = VerificationMethod::Consensus;
OracleResult::Consensus {
trace,
agreement_ratio,
}
}
fn build_base_trace(
result: &RlmAnalysisResult,
source_path: Option<&str>,
repo_revision: Option<&str>,
trace_steps: Option<Vec<TraceStep>>,
final_payload: FinalPayload,
) -> ValidatedTrace {
let revision = repo_revision
.map(ToString::to_string)
.or_else(|| Self::get_git_revision().ok())
.unwrap_or_else(|| "unknown".to_string());
let query = result
.sub_queries
.first()
.map(|sq| sq.query.clone())
.unwrap_or_else(|| "unknown query".to_string());
ValidatedTrace {
prompt: query,
trace: trace_steps.unwrap_or_default(),
final_payload: Some(final_payload),
verdict: "unverified".to_string(),
oracle_diff: None,
repo_revision: revision,
timestamp: Utc::now().to_rfc3339(),
answer: result.answer.clone(),
iterations: result.iterations,
subcalls: result.sub_queries.len(),
input_tokens: result.stats.input_tokens,
output_tokens: result.stats.output_tokens,
elapsed_ms: result.stats.elapsed_ms,
source_path: source_path.map(ToString::to_string),
verification_method: VerificationMethod::None,
trace_id: uuid::Uuid::new_v4().to_string(),
}
}
fn build_placeholder_trace(
source_path: Option<&str>,
repo_revision: Option<&str>,
final_payload: Option<FinalPayload>,
) -> ValidatedTrace {
let revision = repo_revision
.map(ToString::to_string)
.or_else(|| Self::get_git_revision().ok())
.unwrap_or_else(|| "unknown".to_string());
ValidatedTrace {
prompt: "unknown query".to_string(),
trace: Vec::new(),
final_payload,
verdict: "unverified".to_string(),
oracle_diff: None,
repo_revision: revision,
timestamp: Utc::now().to_rfc3339(),
answer: String::new(),
iterations: 0,
subcalls: 0,
input_tokens: 0,
output_tokens: 0,
elapsed_ms: 0,
source_path: source_path.map(ToString::to_string),
verification_method: VerificationMethod::None,
trace_id: uuid::Uuid::new_v4().to_string(),
}
}
fn validate_grep_payload(
&self,
payload: &FinalPayload,
source: &str,
_source_path: Option<&str>,
_query: &str,
base_trace: impl FnOnce() -> ValidatedTrace,
) -> OracleResult {
let grep_payload = match payload {
FinalPayload::Grep(p) => p,
_ => unreachable!(),
};
let oracle = GrepOracle::new(source.to_string());
let ground_truth = match oracle.grep(&grep_payload.pattern) {
Ok(m) => m,
Err(e) => {
let mut trace = base_trace();
trace.verdict = "unverified".to_string();
return OracleResult::Unverified {
reason: format!("Could not run grep: {}", e),
trace,
};
}
};
let claimed: Vec<(usize, String)> = grep_payload
.matches
.iter()
.map(|m| (m.line, m.text.clone()))
.collect();
let verification = oracle.verify_matches(&claimed, &ground_truth);
self.oracle_result_from_grep_verification(verification, base_trace)
}
fn oracle_result_from_grep_verification(
&self,
verification: super::grep_oracle::GrepVerification,
base_trace: impl FnOnce() -> ValidatedTrace,
) -> OracleResult {
match verification {
super::grep_oracle::GrepVerification::ExactMatch
| super::grep_oracle::GrepVerification::UnorderedMatch => {
let mut trace = base_trace();
trace.verification_method = VerificationMethod::GrepOracle;
trace.verdict = "golden".to_string();
OracleResult::Golden(trace)
}
super::grep_oracle::GrepVerification::SubsetMatch { claimed, actual } => {
let coverage = claimed as f32 / actual.max(1) as f32;
if coverage >= self.confidence_threshold {
let mut trace = base_trace();
trace.verification_method = VerificationMethod::GrepOracle;
trace.verdict = "golden".to_string();
OracleResult::Golden(trace)
} else {
let diff = format!(
"Subset match: model claimed {} but source has {} (coverage: {:.1}%)",
claimed,
actual,
coverage * 100.0
);
let mut trace = base_trace();
trace.verification_method = VerificationMethod::GrepOracle;
trace.verdict = "failed".to_string();
trace.oracle_diff = Some(diff.clone());
OracleResult::Failed {
reason: diff.clone(),
diff: Some(diff),
trace,
}
}
}
super::grep_oracle::GrepVerification::HasFalsePositives { false_positives } => {
let diff = format!(
"False positives: {} claims not found in source: {:?}",
false_positives.len(),
false_positives
);
let mut trace = base_trace();
trace.verification_method = VerificationMethod::GrepOracle;
trace.verdict = "failed".to_string();
trace.oracle_diff = Some(diff.clone());
OracleResult::Failed {
reason: diff.clone(),
diff: Some(diff),
trace,
}
}
super::grep_oracle::GrepVerification::HasFalseNegatives { false_negatives } => {
let diff = format!(
"False negatives: {} items in source not claimed: {:?}",
false_negatives.len(),
false_negatives
);
let mut trace = base_trace();
trace.verification_method = VerificationMethod::GrepOracle;
trace.verdict = "failed".to_string();
trace.oracle_diff = Some(diff.clone());
OracleResult::Failed {
reason: diff.clone(),
diff: Some(diff),
trace,
}
}
super::grep_oracle::GrepVerification::Mismatch => {
let diff = "Complete mismatch between claimed and actual matches".to_string();
let mut trace = base_trace();
trace.verification_method = VerificationMethod::GrepOracle;
trace.verdict = "failed".to_string();
trace.oracle_diff = Some(diff.clone());
OracleResult::Failed {
reason: diff.clone(),
diff: Some(diff),
trace,
}
}
super::grep_oracle::GrepVerification::CannotVerify { reason } => {
let mut trace = base_trace();
trace.verdict = "unverified".to_string();
OracleResult::Unverified { reason, trace }
}
}
}
fn validate_ast_payload(
&self,
payload: &FinalPayload,
source: &str,
_source_path: Option<&str>,
_query: &str,
base_trace: impl FnOnce() -> ValidatedTrace,
) -> OracleResult {
let ast_payload = match payload {
FinalPayload::Ast(p) => p,
_ => unreachable!(),
};
let mut oracle = TreeSitterOracle::new(source.to_string());
let actual_results: Vec<AstResult> = match ast_payload.query.as_str() {
"functions" => match oracle.get_functions() {
Ok(funcs) => funcs
.iter()
.map(|f| AstResult {
name: f.name.clone(),
args: parse_params(&f.params),
return_type: f
.return_type
.as_deref()
.map(|r| r.trim().trim_start_matches("->").trim().to_string()),
span: Some((f.line, f.line)),
})
.collect(),
Err(e) => {
let mut trace = base_trace();
trace.verdict = "unverified".to_string();
return OracleResult::Unverified {
reason: format!("Failed to parse AST: {}", e),
trace,
};
}
},
"structs" => match oracle.get_structs() {
Ok(structs) => structs
.iter()
.map(|s| AstResult {
name: s.name.clone(),
args: s.fields.clone(),
return_type: None,
span: Some((s.line, s.line)),
})
.collect(),
Err(e) => {
let mut trace = base_trace();
trace.verdict = "unverified".to_string();
return OracleResult::Unverified {
reason: format!("Failed to parse AST: {}", e),
trace,
};
}
},
"enums" => match oracle.get_enums() {
Ok(enums) => enums
.iter()
.map(|e| AstResult {
name: e.name.clone(),
args: e.variants.clone(),
return_type: None,
span: Some((e.line, e.line)),
})
.collect(),
Err(e) => {
let mut trace = base_trace();
trace.verdict = "unverified".to_string();
return OracleResult::Unverified {
reason: format!("Failed to parse AST: {}", e),
trace,
};
}
},
"impls" => match oracle.get_impls() {
Ok(impls) => impls
.iter()
.map(|i| AstResult {
name: i.type_name.clone(),
args: i.trait_name.clone().map(|t| vec![t]).unwrap_or_default(),
return_type: Some(format!("methods:{}", i.method_count)),
span: Some((i.line, i.line)),
})
.collect(),
Err(e) => {
let mut trace = base_trace();
trace.verdict = "unverified".to_string();
return OracleResult::Unverified {
reason: format!("Failed to parse AST: {}", e),
trace,
};
}
},
_ => {
match oracle.get_functions() {
Ok(funcs) => funcs
.iter()
.map(|f| AstResult {
name: f.name.clone(),
args: parse_params(&f.params),
return_type: f
.return_type
.as_deref()
.map(|r| r.trim().trim_start_matches("->").trim().to_string()),
span: Some((f.line, f.line)),
})
.collect(),
Err(_) => vec![],
}
}
};
let claimed: std::collections::HashSet<_> = ast_payload
.results
.iter()
.map(normalize_ast_result)
.collect();
let actual: std::collections::HashSet<_> =
actual_results.iter().map(normalize_ast_result).collect();
if claimed == actual {
let mut trace = base_trace();
trace.verification_method = VerificationMethod::TreeSitterOracle;
trace.verdict = "golden".to_string();
OracleResult::Golden(trace)
} else if claimed.is_subset(&actual) {
let coverage = claimed.len() as f32 / actual.len().max(1) as f32;
if coverage >= self.confidence_threshold {
let mut trace = base_trace();
trace.verification_method = VerificationMethod::TreeSitterOracle;
trace.verdict = "golden".to_string();
OracleResult::Golden(trace)
} else {
let diff = format!(
"Partial structural match. Claimed {} entries, actual {} entries.",
claimed.len(),
actual.len()
);
let mut trace = base_trace();
trace.verification_method = VerificationMethod::TreeSitterOracle;
trace.verdict = "failed".to_string();
trace.oracle_diff = Some(diff.clone());
OracleResult::Failed {
reason: diff.clone(),
diff: Some(diff),
trace,
}
}
} else {
let extra: Vec<_> = claimed.difference(&actual).cloned().collect();
let missing: Vec<_> = actual.difference(&claimed).cloned().collect();
let diff = format!(
"Structural mismatch. extra={} missing={} extra_items={:?} missing_items={:?}",
extra.len(),
missing.len(),
extra,
missing
);
let mut trace = base_trace();
trace.verification_method = VerificationMethod::TreeSitterOracle;
trace.verdict = "failed".to_string();
trace.oracle_diff = Some(diff.clone());
OracleResult::Failed {
reason: diff.clone(),
diff: Some(diff),
trace,
}
}
}
fn get_git_revision() -> Result<String> {
let output = std::process::Command::new("git")
.args(["rev-parse", "HEAD"])
.output()?;
Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
}
pub fn batch_validate<'a>(
&self,
traces: impl IntoIterator<Item = (RlmAnalysisResult, &'a str, Option<&'a str>)>,
) -> BatchValidationStats {
self.batch_validate_with_options(traces, None, None)
}
pub fn batch_validate_with_options<'a>(
&self,
traces: impl IntoIterator<Item = (RlmAnalysisResult, &'a str, Option<&'a str>)>,
repo_revision: Option<&str>,
trace_steps: Option<Vec<TraceStep>>,
) -> BatchValidationStats {
let mut stats = BatchValidationStats::default();
for (result, source, source_path) in traces {
match self.validate(
&result,
source,
source_path,
repo_revision,
trace_steps.clone(),
) {
OracleResult::Golden(trace) => {
stats.golden.push(trace);
}
OracleResult::Consensus { trace, .. } => {
stats.consensus.push(trace);
}
OracleResult::Unverified { reason, trace } => {
let _ = result;
stats.unverified.push((trace, reason));
}
OracleResult::Failed { reason, trace, .. } => {
stats.failed.push((trace, reason));
}
}
}
stats
}
}
#[derive(Debug, Clone, Default)]
pub struct BatchValidationStats {
pub golden: Vec<ValidatedTrace>,
pub consensus: Vec<ValidatedTrace>,
pub unverified: Vec<(ValidatedTrace, String)>,
pub failed: Vec<(ValidatedTrace, String)>,
}
impl BatchValidationStats {
pub fn total(&self) -> usize {
self.golden.len() + self.consensus.len() + self.unverified.len() + self.failed.len()
}
pub fn golden_rate(&self) -> f32 {
let total = self.total();
if total == 0 {
0.0
} else {
self.golden.len() as f32 / total as f32
}
}
pub fn write_jsonl(&self, path: &str) -> Result<usize> {
use std::fs::File;
use std::io::{BufWriter, Write};
let file = File::create(path)?;
let mut writer = BufWriter::new(file);
let mut count = 0;
for trace in &self.golden {
let json = serde_json::to_string(trace)?;
writeln!(writer, "{}", json)?;
count += 1;
}
writer.flush()?;
Ok(count)
}
pub fn write_jsonl_split(&self, out_dir: &str, prefix: &str) -> Result<SplitWriteStats> {
use std::fs::File;
use std::io::{BufWriter, Write};
use std::path::Path;
let dir = Path::new(out_dir);
std::fs::create_dir_all(dir)?;
let golden_path = dir.join(format!("{prefix}.golden.jsonl"));
let consensus_path = dir.join(format!("{prefix}.consensus.jsonl"));
let failed_path = dir.join(format!("{prefix}.failed.jsonl"));
let unverified_path = dir.join(format!("{prefix}.unverified.jsonl"));
let mut golden_writer = BufWriter::new(File::create(&golden_path)?);
let mut consensus_writer = BufWriter::new(File::create(&consensus_path)?);
let mut failed_writer = BufWriter::new(File::create(&failed_path)?);
let mut unverified_writer = BufWriter::new(File::create(&unverified_path)?);
for trace in &self.golden {
let rec = OracleTraceRecord {
verdict: "golden".to_string(),
reason: None,
agreement_ratio: None,
trace: trace.clone(),
};
writeln!(golden_writer, "{}", serde_json::to_string(&rec)?)?;
}
for trace in &self.consensus {
let rec = OracleTraceRecord {
verdict: "consensus".to_string(),
reason: None,
agreement_ratio: None,
trace: trace.clone(),
};
writeln!(consensus_writer, "{}", serde_json::to_string(&rec)?)?;
}
for (trace, reason) in &self.failed {
let rec = OracleTraceRecord {
verdict: "failed".to_string(),
reason: Some(reason.clone()),
agreement_ratio: None,
trace: trace.clone(),
};
writeln!(failed_writer, "{}", serde_json::to_string(&rec)?)?;
}
for (trace, reason) in &self.unverified {
let rec = OracleTraceRecord {
verdict: "unverified".to_string(),
reason: Some(reason.clone()),
agreement_ratio: None,
trace: trace.clone(),
};
writeln!(unverified_writer, "{}", serde_json::to_string(&rec)?)?;
}
golden_writer.flush()?;
consensus_writer.flush()?;
failed_writer.flush()?;
unverified_writer.flush()?;
Ok(SplitWriteStats {
golden_path: golden_path.to_string_lossy().to_string(),
consensus_path: consensus_path.to_string_lossy().to_string(),
failed_path: failed_path.to_string_lossy().to_string(),
unverified_path: unverified_path.to_string_lossy().to_string(),
golden_count: self.golden.len(),
consensus_count: self.consensus.len(),
failed_count: self.failed.len(),
unverified_count: self.unverified.len(),
})
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SplitWriteStats {
pub golden_path: String,
pub consensus_path: String,
pub failed_path: String,
pub unverified_path: String,
pub golden_count: usize,
pub consensus_count: usize,
pub failed_count: usize,
pub unverified_count: usize,
}
fn normalize_type_text(s: &str) -> String {
s.chars().filter(|c| !c.is_whitespace()).collect()
}
fn parse_params(params: &str) -> Vec<String> {
let trimmed = params.trim().trim_start_matches('(').trim_end_matches(')');
if trimmed.is_empty() {
return Vec::new();
}
trimmed
.split(',')
.map(str::trim)
.filter(|s| !s.is_empty())
.map(ToString::to_string)
.collect()
}
fn canonicalize_semantic_answer(answer: &str) -> String {
answer.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn normalize_ast_result(result: &AstResult) -> String {
let args = if result.args.is_empty() {
String::new()
} else {
result.args.join(",").replace(' ', "")
};
let return_type = result
.return_type
.as_deref()
.map(normalize_type_text)
.unwrap_or_default();
let span = result
.span
.map(|(s, e)| format!("{s}:{e}"))
.unwrap_or_default();
format!("{}|{}|{}|{}", result.name.trim(), args, return_type, span)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::rlm::{RlmStats, SubQuery};
fn make_result(answer: &str, query: &str) -> RlmAnalysisResult {
RlmAnalysisResult {
answer: answer.to_string(),
iterations: 2,
sub_queries: vec![SubQuery {
query: query.to_string(),
context_slice: None,
response: answer.to_string(),
tokens_used: 0,
}],
stats: RlmStats {
input_tokens: 100,
output_tokens: 50,
iterations: 2,
subcalls: 0,
elapsed_ms: 500,
compression_ratio: 1.0,
},
}
}
fn sample_rust_code() -> &'static str {
r#"
pub async fn process(input: &str) -> Result<String> {
let data = parse(input)?;
Ok(data)
}
async fn parse(input: &str) -> Result<String> {
Ok(input.to_uppercase())
}
pub struct Config {
pub debug: bool,
}
"#
}
#[test]
fn validate_grep_match() {
let validator = TraceValidator::new();
let source = sample_rust_code();
let result = make_result(
r#"{"kind": "grep", "file": "test.rs", "pattern": "async fn", "matches": [{"line": 2, "text": "pub async fn process(input: &str) -> Result<String> {"}, {"line": 7, "text": "async fn parse(input: &str) -> Result<String> {"}]}"#,
"Find all async functions",
);
match validator.validate(&result, source, Some("test.rs"), Some("abc123"), None) {
OracleResult::Golden(trace) => {
assert_eq!(trace.verification_method, VerificationMethod::GrepOracle);
assert_eq!(trace.verdict, "golden");
}
OracleResult::Consensus { .. } => panic!("Expected deterministic golden"),
OracleResult::Unverified { .. } => panic!("Expected golden"),
OracleResult::Failed { .. } => panic!("Expected golden"),
}
}
#[test]
fn validate_semantic_unverified() {
let validator = TraceValidator::new();
let source = sample_rust_code();
let result = make_result(
r#"{"kind": "semantic", "file": "test.rs", "answer": "This function processes input by parsing it and returning uppercase"}"#,
"Explain what the process function does",
);
match validator.validate(&result, source, Some("test.rs"), Some("abc123"), None) {
OracleResult::Unverified { reason, .. } => {
assert!(reason.contains("Semantic"));
}
OracleResult::Golden(_) => panic!("Expected unverified"),
OracleResult::Consensus { .. } => panic!("Expected unverified"),
OracleResult::Failed { .. } => panic!("Expected unverified"),
}
}
#[test]
fn validate_semantic_consensus() {
let validator = TraceValidator::new();
let source = sample_rust_code();
let results = vec![
make_result(
r#"{"kind":"semantic","file":"x.rs","answer":"Auth middleware is mandatory for all routes except /health."}"#,
"Explain auth",
),
make_result(
r#"{"kind":"semantic","file":"x.rs","answer":"Auth middleware is mandatory for all routes except /health."}"#,
"Explain auth",
),
make_result(
r#"{"kind":"semantic","file":"x.rs","answer":"Auth middleware is mandatory for all routes except /health."}"#,
"Explain auth",
),
];
match validator.validate_with_consensus(
&results,
source,
Some("x.rs"),
Some("abc123"),
None,
) {
OracleResult::Consensus {
trace,
agreement_ratio,
} => {
assert_eq!(trace.verification_method, VerificationMethod::Consensus);
assert_eq!(trace.verdict, "consensus");
assert_eq!(agreement_ratio, 1.0);
}
_ => panic!("Expected consensus verification"),
}
}
#[test]
fn validate_plain_text_pattern_query_without_json_is_verified() {
let validator = TraceValidator::new();
let source = sample_rust_code();
let result = make_result(
"There are 2 occurrences of async functions in test.rs",
"Find all async functions",
);
match validator.validate(&result, source, Some("test.rs"), Some("abc123"), None) {
OracleResult::Golden(trace) => {
assert_eq!(trace.verification_method, VerificationMethod::GrepOracle);
assert_eq!(trace.verdict, "golden");
}
other => panic!(
"Expected golden from plaintext grep fallback, got {:?}",
other
),
}
}
#[test]
fn validate_json_like_malformed_payload_stays_failed() {
let validator = TraceValidator::new();
let source = sample_rust_code();
let result = make_result("{ this is not valid json", "Find all async functions");
match validator.validate(&result, source, Some("test.rs"), Some("abc123"), None) {
OracleResult::Failed { reason, .. } => {
assert!(reason.contains("Malformed FINAL payload"));
}
other => panic!(
"Expected failed verdict for malformed JSON-like payload, got {:?}",
other
),
}
}
#[test]
fn batch_validate_mixed() {
let validator = TraceValidator::new();
let source = sample_rust_code();
let traces = vec![
(
make_result(
r#"{"kind": "grep", "file": "x.rs", "pattern": "async fn", "matches": [{"line": 2, "text": "pub async fn process(input: &str) -> Result<String> {"}, {"line": 7, "text": "async fn parse(input: &str) -> Result<String> {"}]}"#,
"Find async",
),
source,
None,
),
(
make_result(
r#"{"kind": "semantic", "file": "x.rs", "answer": "text"}"#,
"Explain",
),
source,
None,
),
];
let stats = validator.batch_validate(traces);
assert!(stats.golden.len() >= 1);
assert!(stats.unverified.len() >= 1);
assert!(stats.total() == 2);
}
#[test]
fn write_jsonl_split_creates_all_buckets() {
let validator = TraceValidator::new();
let source = sample_rust_code();
let traces = vec![
(
make_result(
r#"{"kind":"grep","file":"x.rs","pattern":"async fn","matches":[{"line":2,"text":"pub async fn process(input: &str) -> Result<String> {"},{"line":7,"text":"async fn parse(input: &str) -> Result<String> {"}]}"#,
"Find async",
),
source,
None,
),
(
make_result(
r#"{"kind":"semantic","file":"x.rs","answer":"This is semantic"}"#,
"Explain",
),
source,
None,
),
];
let stats = validator.batch_validate(traces);
let tmp = tempfile::tempdir().expect("tempdir");
let split = stats
.write_jsonl_split(tmp.path().to_str().unwrap_or("."), "oracle")
.expect("write split jsonl");
assert_eq!(split.golden_count, stats.golden.len());
assert_eq!(split.consensus_count, stats.consensus.len());
assert_eq!(split.unverified_count, stats.unverified.len());
assert_eq!(split.failed_count, stats.failed.len());
assert!(std::path::Path::new(&split.golden_path).exists());
assert!(std::path::Path::new(&split.consensus_path).exists());
assert!(std::path::Path::new(&split.failed_path).exists());
assert!(std::path::Path::new(&split.unverified_path).exists());
}
}